LLVM 22.0.0git
WebAssemblyISelLowering.cpp
Go to the documentation of this file.
1//=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the WebAssemblyTargetLowering class.
11///
12//===----------------------------------------------------------------------===//
13
32#include "llvm/IR/Function.h"
34#include "llvm/IR/Intrinsics.h"
35#include "llvm/IR/IntrinsicsWebAssembly.h"
40using namespace llvm;
41
42#define DEBUG_TYPE "wasm-lower"
43
45 const TargetMachine &TM, const WebAssemblySubtarget &STI)
46 : TargetLowering(TM), Subtarget(&STI) {
47 auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
48
49 // Set the load count for memcmp expand optimization
52
53 // Booleans always contain 0 or 1.
55 // Except in SIMD vectors
57 // We don't know the microarchitecture here, so just reduce register pressure.
59 // Tell ISel that we have a stack pointer.
61 Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
62 // Set up the register classes.
63 addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
64 addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
65 addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
66 addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
67 if (Subtarget->hasSIMD128()) {
68 addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
69 addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
70 addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
71 addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
72 addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
73 addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
74 }
75 if (Subtarget->hasFP16()) {
76 addRegisterClass(MVT::v8f16, &WebAssembly::V128RegClass);
77 }
78 if (Subtarget->hasReferenceTypes()) {
79 addRegisterClass(MVT::externref, &WebAssembly::EXTERNREFRegClass);
80 addRegisterClass(MVT::funcref, &WebAssembly::FUNCREFRegClass);
81 if (Subtarget->hasExceptionHandling()) {
82 addRegisterClass(MVT::exnref, &WebAssembly::EXNREFRegClass);
83 }
84 }
85 // Compute derived properties from the register classes.
86 computeRegisterProperties(Subtarget->getRegisterInfo());
87
88 // Transform loads and stores to pointers in address space 1 to loads and
89 // stores to WebAssembly global variables, outside linear memory.
90 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) {
91 setOperationAction(ISD::LOAD, T, Custom);
92 setOperationAction(ISD::STORE, T, Custom);
93 }
94 if (Subtarget->hasSIMD128()) {
95 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
96 MVT::v2f64}) {
97 setOperationAction(ISD::LOAD, T, Custom);
98 setOperationAction(ISD::STORE, T, Custom);
99 }
100 }
101 if (Subtarget->hasFP16()) {
102 setOperationAction(ISD::LOAD, MVT::v8f16, Custom);
103 setOperationAction(ISD::STORE, MVT::v8f16, Custom);
104 }
105 if (Subtarget->hasReferenceTypes()) {
106 // We need custom load and store lowering for both externref, funcref and
107 // Other. The MVT::Other here represents tables of reference types.
108 for (auto T : {MVT::externref, MVT::funcref, MVT::Other}) {
109 setOperationAction(ISD::LOAD, T, Custom);
110 setOperationAction(ISD::STORE, T, Custom);
111 }
112 }
113
119 setOperationAction(ISD::BRIND, MVT::Other, Custom);
121
122 // Take the default expansion for va_arg, va_copy, and va_end. There is no
123 // default action for va_start, so we do that custom.
124 setOperationAction(ISD::VASTART, MVT::Other, Custom);
125 setOperationAction(ISD::VAARG, MVT::Other, Expand);
126 setOperationAction(ISD::VACOPY, MVT::Other, Expand);
127 setOperationAction(ISD::VAEND, MVT::Other, Expand);
128
129 for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64, MVT::v8f16}) {
130 if (!Subtarget->hasFP16() && T == MVT::v8f16) {
131 continue;
132 }
133 // Don't expand the floating-point types to constant pools.
135 // Expand floating-point comparisons.
136 for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
139 // Expand floating-point library function operators.
140 for (auto Op :
141 {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA})
143 // Note supported floating-point library function operators that otherwise
144 // default to expand.
145 for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT,
146 ISD::FRINT, ISD::FROUNDEVEN})
148 // Support minimum and maximum, which otherwise default to expand.
149 setOperationAction(ISD::FMINIMUM, T, Legal);
150 setOperationAction(ISD::FMAXIMUM, T, Legal);
151 // When experimental v8f16 support is enabled these instructions don't need
152 // to be expanded.
153 if (T != MVT::v8f16) {
154 setOperationAction(ISD::FP16_TO_FP, T, Expand);
155 setOperationAction(ISD::FP_TO_FP16, T, Expand);
156 }
158 setTruncStoreAction(T, MVT::f16, Expand);
159 }
160
161 // Expand unavailable integer operations.
162 for (auto Op :
166 for (auto T : {MVT::i32, MVT::i64})
168 if (Subtarget->hasSIMD128())
169 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
171 }
172
173 if (Subtarget->hasWideArithmetic()) {
179 }
180
181 if (Subtarget->hasNontrappingFPToInt())
183 for (auto T : {MVT::i32, MVT::i64})
185
186 if (Subtarget->hasRelaxedSIMD()) {
188 {ISD::FMINNUM, ISD::FMINIMUMNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM},
189 {MVT::v4f32, MVT::v2f64}, Legal);
190 }
191 // SIMD-specific configuration
192 if (Subtarget->hasSIMD128()) {
193
195
196 // Combine wide-vector muls, with extend inputs, to extmul_half.
198
199 // Combine vector mask reductions into alltrue/anytrue
201
202 // Convert vector to integer bitcasts to bitmask
203 setTargetDAGCombine(ISD::BITCAST);
204
205 // Hoist bitcasts out of shuffles
207
208 // Combine extends of extract_subvectors into widening ops
210
211 // Combine int_to_fp or fp_extend of extract_vectors and vice versa into
212 // conversions ops
215
216 // Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa
217 // into conversion ops
221
223
224 // Support saturating add/sub for i8x16 and i16x8
226 for (auto T : {MVT::v16i8, MVT::v8i16})
228
229 // Support integer abs
230 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
232
233 // Custom lower BUILD_VECTORs to minimize number of replace_lanes
234 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
235 MVT::v2f64})
237
238 if (Subtarget->hasFP16())
240
241 // We have custom shuffle lowering to expose the shuffle mask
242 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
243 MVT::v2f64})
245
246 if (Subtarget->hasFP16())
248
249 // Support splatting
250 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
251 MVT::v2f64})
253
254 setOperationAction(ISD::AVGCEILU, {MVT::v8i16, MVT::v16i8}, Legal);
255
256 // Custom lowering since wasm shifts must have a scalar shift amount
257 for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
258 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
260
261 // Custom lower lane accesses to expand out variable indices
263 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
264 MVT::v2f64})
266
267 // There is no i8x16.mul instruction
268 setOperationAction(ISD::MUL, MVT::v16i8, Expand);
269
270 // There is no vector conditional select instruction
271 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
272 MVT::v2f64})
274
275 // Expand integer operations supported for scalars but not SIMD
276 for (auto Op :
278 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
280
281 // But we do have integer min and max operations
282 for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
283 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
285
286 // And we have popcnt for i8x16. It can be used to expand ctlz/cttz.
287 setOperationAction(ISD::CTPOP, MVT::v16i8, Legal);
288 setOperationAction(ISD::CTLZ, MVT::v16i8, Expand);
289 setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
290
291 // Custom lower bit counting operations for other types to scalarize them.
292 for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
293 for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
295
296 // Expand float operations supported for scalars but not SIMD
297 for (auto Op : {ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
298 ISD::FEXP, ISD::FEXP2, ISD::FEXP10})
299 for (auto T : {MVT::v4f32, MVT::v2f64})
301
302 // Unsigned comparison operations are unavailable for i64x2 vectors.
304 setCondCodeAction(CC, MVT::v2i64, Custom);
305
306 // 64x2 conversions are not in the spec
307 for (auto Op :
309 for (auto T : {MVT::v2i64, MVT::v2f64})
311
312 // But saturating fp_to_int converstions are
314 setOperationAction(Op, MVT::v4i32, Custom);
315 if (Subtarget->hasFP16()) {
316 setOperationAction(Op, MVT::v8i16, Custom);
317 }
318 }
319
320 // Support vector extending
324 }
325
326 if (Subtarget->hasFP16()) {
327 setOperationAction(ISD::FMA, MVT::v8f16, Legal);
328 }
329
330 if (Subtarget->hasRelaxedSIMD()) {
333 }
334
335 // Partial MLA reductions.
336 for (auto Op : {ISD::PARTIAL_REDUCE_SMLA, ISD::PARTIAL_REDUCE_UMLA}) {
337 setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v16i8, Legal);
338 setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v8i16, Legal);
339 }
340 }
341
342 // As a special case, these operators use the type to mean the type to
343 // sign-extend from.
345 if (!Subtarget->hasSignExt()) {
346 // Sign extends are legal only when extending a vector extract
347 auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
348 for (auto T : {MVT::i8, MVT::i16, MVT::i32})
350 }
353
354 // Dynamic stack allocation: use the default expansion.
355 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
356 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
357 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand);
358
362
363 // Expand these forms; we pattern-match the forms that we can handle in isel.
364 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
365 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
367
368 // We have custom switch handling.
369 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
370
371 // WebAssembly doesn't have:
372 // - Floating-point extending loads.
373 // - Floating-point truncating stores.
374 // - i1 extending loads.
375 // - truncating SIMD stores and most extending loads
376 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
377 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
378 for (auto T : MVT::integer_valuetypes())
379 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
380 setLoadExtAction(Ext, T, MVT::i1, Promote);
381 if (Subtarget->hasSIMD128()) {
382 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
383 MVT::v2f64}) {
384 for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
385 if (MVT(T) != MemT) {
387 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
388 setLoadExtAction(Ext, T, MemT, Expand);
389 }
390 }
391 }
392 // But some vector extending loads are legal
393 for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
394 setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
395 setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
396 setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
397 }
398 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Legal);
399 }
400
401 // Don't do anything clever with build_pairs
403
404 // Trap lowers to wasm unreachable
405 setOperationAction(ISD::TRAP, MVT::Other, Legal);
406 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
407
408 // Exception handling intrinsics
412
414
415 // Always convert switches to br_tables unless there is only one case, which
416 // is equivalent to a simple branch. This reduces code size for wasm, and we
417 // defer possible jump table optimizations to the VM.
419}
420
429
438
440WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
441 // We have wasm instructions for these
442 switch (AI->getOperation()) {
450 default:
451 break;
452 }
454}
455
456bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
457 // Implementation copied from X86TargetLowering.
458 unsigned Opc = VecOp.getOpcode();
459
460 // Assume target opcodes can't be scalarized.
461 // TODO - do we have any exceptions?
463 return false;
464
465 // If the vector op is not supported, try to convert to scalar.
466 EVT VecVT = VecOp.getValueType();
468 return true;
469
470 // If the vector op is supported, but the scalar op is not, the transform may
471 // not be worthwhile.
472 EVT ScalarVT = VecVT.getScalarType();
473 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
474}
475
476FastISel *WebAssemblyTargetLowering::createFastISel(
477 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {
478 return WebAssembly::createFastISel(FuncInfo, LibInfo);
479}
480
481MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
482 EVT VT) const {
483 unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
484 if (BitWidth > 1 && BitWidth < 8)
485 BitWidth = 8;
486
487 if (BitWidth > 64) {
488 // The shift will be lowered to a libcall, and compiler-rt libcalls expect
489 // the count to be an i32.
490 BitWidth = 32;
492 "32-bit shift counts ought to be enough for anyone");
493 }
494
497 "Unable to represent scalar shift amount type");
498 return Result;
499}
500
501// Lower an fp-to-int conversion operator from the LLVM opcode, which has an
502// undefined result on invalid/overflow, to the WebAssembly opcode, which
503// traps on invalid/overflow.
506 const TargetInstrInfo &TII,
507 bool IsUnsigned, bool Int64,
508 bool Float64, unsigned LoweredOpcode) {
510
511 Register OutReg = MI.getOperand(0).getReg();
512 Register InReg = MI.getOperand(1).getReg();
513
514 unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
515 unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
516 unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
517 unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
518 unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
519 unsigned Eqz = WebAssembly::EQZ_I32;
520 unsigned And = WebAssembly::AND_I32;
521 int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
522 int64_t Substitute = IsUnsigned ? 0 : Limit;
523 double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
524 auto &Context = BB->getParent()->getFunction().getContext();
525 Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
526
527 const BasicBlock *LLVMBB = BB->getBasicBlock();
528 MachineFunction *F = BB->getParent();
529 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
530 MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
531 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
532
534 F->insert(It, FalseMBB);
535 F->insert(It, TrueMBB);
536 F->insert(It, DoneMBB);
537
538 // Transfer the remainder of BB and its successor edges to DoneMBB.
539 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
541
542 BB->addSuccessor(TrueMBB);
543 BB->addSuccessor(FalseMBB);
544 TrueMBB->addSuccessor(DoneMBB);
545 FalseMBB->addSuccessor(DoneMBB);
546
547 unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
548 Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
549 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
550 CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
551 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
552 FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
553 TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
554
555 MI.eraseFromParent();
556 // For signed numbers, we can do a single comparison to determine whether
557 // fabs(x) is within range.
558 if (IsUnsigned) {
559 Tmp0 = InReg;
560 } else {
561 BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
562 }
563 BuildMI(BB, DL, TII.get(FConst), Tmp1)
564 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
565 BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
566
567 // For unsigned numbers, we have to do a separate comparison with zero.
568 if (IsUnsigned) {
569 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
570 Register SecondCmpReg =
571 MRI.createVirtualRegister(&WebAssembly::I32RegClass);
572 Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
573 BuildMI(BB, DL, TII.get(FConst), Tmp1)
574 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
575 BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
576 BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
577 CmpReg = AndReg;
578 }
579
580 BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
581
582 // Create the CFG diamond to select between doing the conversion or using
583 // the substitute value.
584 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
585 BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
586 BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
587 BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
588 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
589 .addReg(FalseReg)
590 .addMBB(FalseMBB)
591 .addReg(TrueReg)
592 .addMBB(TrueMBB);
593
594 return DoneMBB;
595}
596
597// Lower a `MEMCPY` instruction into a CFG triangle around a `MEMORY_COPY`
598// instuction to handle the zero-length case.
601 const TargetInstrInfo &TII, bool Int64) {
603
604 MachineOperand DstMem = MI.getOperand(0);
605 MachineOperand SrcMem = MI.getOperand(1);
606 MachineOperand Dst = MI.getOperand(2);
607 MachineOperand Src = MI.getOperand(3);
608 MachineOperand Len = MI.getOperand(4);
609
610 // If the length is a constant, we don't actually need the check.
611 if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
612 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
613 Def->getOpcode() == WebAssembly::CONST_I64) {
614 if (Def->getOperand(1).getImm() == 0) {
615 // A zero-length memcpy is a no-op.
616 MI.eraseFromParent();
617 return BB;
618 }
619 // A non-zero-length memcpy doesn't need a zero check.
620 unsigned MemoryCopy =
621 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
622 BuildMI(*BB, MI, DL, TII.get(MemoryCopy))
623 .add(DstMem)
624 .add(SrcMem)
625 .add(Dst)
626 .add(Src)
627 .add(Len);
628 MI.eraseFromParent();
629 return BB;
630 }
631 }
632
633 // We're going to add an extra use to `Len` to test if it's zero; that
634 // use shouldn't be a kill, even if the original use is.
635 MachineOperand NoKillLen = Len;
636 NoKillLen.setIsKill(false);
637
638 // Decide on which `MachineInstr` opcode we're going to use.
639 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
640 unsigned MemoryCopy =
641 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
642
643 // Create two new basic blocks; one for the new `memory.fill` that we can
644 // branch over, and one for the rest of the instructions after the original
645 // `memory.fill`.
646 const BasicBlock *LLVMBB = BB->getBasicBlock();
647 MachineFunction *F = BB->getParent();
648 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
649 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
650
652 F->insert(It, TrueMBB);
653 F->insert(It, DoneMBB);
654
655 // Transfer the remainder of BB and its successor edges to DoneMBB.
656 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
658
659 // Connect the CFG edges.
660 BB->addSuccessor(TrueMBB);
661 BB->addSuccessor(DoneMBB);
662 TrueMBB->addSuccessor(DoneMBB);
663
664 // Create a virtual register for the `Eqz` result.
665 unsigned EqzReg;
666 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
667
668 // Erase the original `memory.copy`.
669 MI.eraseFromParent();
670
671 // Test if `Len` is zero.
672 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
673
674 // Insert a new `memory.copy`.
675 BuildMI(TrueMBB, DL, TII.get(MemoryCopy))
676 .add(DstMem)
677 .add(SrcMem)
678 .add(Dst)
679 .add(Src)
680 .add(Len);
681
682 // Create the CFG triangle.
683 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
684 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
685
686 return DoneMBB;
687}
688
689// Lower a `MEMSET` instruction into a CFG triangle around a `MEMORY_FILL`
690// instuction to handle the zero-length case.
693 const TargetInstrInfo &TII, bool Int64) {
695
696 MachineOperand Mem = MI.getOperand(0);
697 MachineOperand Dst = MI.getOperand(1);
698 MachineOperand Val = MI.getOperand(2);
699 MachineOperand Len = MI.getOperand(3);
700
701 // If the length is a constant, we don't actually need the check.
702 if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
703 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
704 Def->getOpcode() == WebAssembly::CONST_I64) {
705 if (Def->getOperand(1).getImm() == 0) {
706 // A zero-length memset is a no-op.
707 MI.eraseFromParent();
708 return BB;
709 }
710 // A non-zero-length memset doesn't need a zero check.
711 unsigned MemoryFill =
712 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
713 BuildMI(*BB, MI, DL, TII.get(MemoryFill))
714 .add(Mem)
715 .add(Dst)
716 .add(Val)
717 .add(Len);
718 MI.eraseFromParent();
719 return BB;
720 }
721 }
722
723 // We're going to add an extra use to `Len` to test if it's zero; that
724 // use shouldn't be a kill, even if the original use is.
725 MachineOperand NoKillLen = Len;
726 NoKillLen.setIsKill(false);
727
728 // Decide on which `MachineInstr` opcode we're going to use.
729 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
730 unsigned MemoryFill =
731 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
732
733 // Create two new basic blocks; one for the new `memory.fill` that we can
734 // branch over, and one for the rest of the instructions after the original
735 // `memory.fill`.
736 const BasicBlock *LLVMBB = BB->getBasicBlock();
737 MachineFunction *F = BB->getParent();
738 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
739 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
740
742 F->insert(It, TrueMBB);
743 F->insert(It, DoneMBB);
744
745 // Transfer the remainder of BB and its successor edges to DoneMBB.
746 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
748
749 // Connect the CFG edges.
750 BB->addSuccessor(TrueMBB);
751 BB->addSuccessor(DoneMBB);
752 TrueMBB->addSuccessor(DoneMBB);
753
754 // Create a virtual register for the `Eqz` result.
755 unsigned EqzReg;
756 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
757
758 // Erase the original `memory.fill`.
759 MI.eraseFromParent();
760
761 // Test if `Len` is zero.
762 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
763
764 // Insert a new `memory.copy`.
765 BuildMI(TrueMBB, DL, TII.get(MemoryFill)).add(Mem).add(Dst).add(Val).add(Len);
766
767 // Create the CFG triangle.
768 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
769 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
770
771 return DoneMBB;
772}
773
774static MachineBasicBlock *
776 const WebAssemblySubtarget *Subtarget,
777 const TargetInstrInfo &TII) {
778 MachineInstr &CallParams = *CallResults.getPrevNode();
779 assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
780 assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS ||
781 CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
782
783 bool IsIndirect =
784 CallParams.getOperand(0).isReg() || CallParams.getOperand(0).isFI();
785 bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
786
787 bool IsFuncrefCall = false;
788 if (IsIndirect && CallParams.getOperand(0).isReg()) {
789 Register Reg = CallParams.getOperand(0).getReg();
790 const MachineFunction *MF = BB->getParent();
791 const MachineRegisterInfo &MRI = MF->getRegInfo();
792 const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
793 IsFuncrefCall = (TRC == &WebAssembly::FUNCREFRegClass);
794 assert(!IsFuncrefCall || Subtarget->hasReferenceTypes());
795 }
796
797 unsigned CallOp;
798 if (IsIndirect && IsRetCall) {
799 CallOp = WebAssembly::RET_CALL_INDIRECT;
800 } else if (IsIndirect) {
801 CallOp = WebAssembly::CALL_INDIRECT;
802 } else if (IsRetCall) {
803 CallOp = WebAssembly::RET_CALL;
804 } else {
805 CallOp = WebAssembly::CALL;
806 }
807
808 MachineFunction &MF = *BB->getParent();
809 const MCInstrDesc &MCID = TII.get(CallOp);
810 MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
811
812 // Move the function pointer to the end of the arguments for indirect calls
813 if (IsIndirect) {
814 auto FnPtr = CallParams.getOperand(0);
815 CallParams.removeOperand(0);
816
817 // For funcrefs, call_indirect is done through __funcref_call_table and the
818 // funcref is always installed in slot 0 of the table, therefore instead of
819 // having the function pointer added at the end of the params list, a zero
820 // (the index in
821 // __funcref_call_table is added).
822 if (IsFuncrefCall) {
823 Register RegZero =
824 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
825 MachineInstrBuilder MIBC0 =
826 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
827
828 BB->insert(CallResults.getIterator(), MIBC0);
829 MachineInstrBuilder(MF, CallParams).addReg(RegZero);
830 } else
831 CallParams.addOperand(FnPtr);
832 }
833
834 for (auto Def : CallResults.defs())
835 MIB.add(Def);
836
837 if (IsIndirect) {
838 // Placeholder for the type index.
839 // This gets replaced with the correct value in WebAssemblyMCInstLower.cpp
840 MIB.addImm(0);
841 // The table into which this call_indirect indexes.
842 MCSymbolWasm *Table = IsFuncrefCall
844 MF.getContext(), Subtarget)
846 MF.getContext(), Subtarget);
847 if (Subtarget->hasCallIndirectOverlong()) {
848 MIB.addSym(Table);
849 } else {
850 // For the MVP there is at most one table whose number is 0, but we can't
851 // write a table symbol or issue relocations. Instead we just ensure the
852 // table is live and write a zero.
853 Table->setNoStrip();
854 MIB.addImm(0);
855 }
856 }
857
858 for (auto Use : CallParams.uses())
859 MIB.add(Use);
860
861 BB->insert(CallResults.getIterator(), MIB);
862 CallParams.eraseFromParent();
863 CallResults.eraseFromParent();
864
865 // If this is a funcref call, to avoid hidden GC roots, we need to clear the
866 // table slot with ref.null upon call_indirect return.
867 //
868 // This generates the following code, which comes right after a call_indirect
869 // of a funcref:
870 //
871 // i32.const 0
872 // ref.null func
873 // table.set __funcref_call_table
874 if (IsIndirect && IsFuncrefCall) {
876 MF.getContext(), Subtarget);
877 Register RegZero =
878 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
879 MachineInstr *Const0 =
880 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
881 BB->insertAfter(MIB.getInstr()->getIterator(), Const0);
882
883 Register RegFuncref =
884 MF.getRegInfo().createVirtualRegister(&WebAssembly::FUNCREFRegClass);
885 MachineInstr *RefNull =
886 BuildMI(MF, DL, TII.get(WebAssembly::REF_NULL_FUNCREF), RegFuncref);
887 BB->insertAfter(Const0->getIterator(), RefNull);
888
889 MachineInstr *TableSet =
890 BuildMI(MF, DL, TII.get(WebAssembly::TABLE_SET_FUNCREF))
891 .addSym(Table)
892 .addReg(RegZero)
893 .addReg(RegFuncref);
894 BB->insertAfter(RefNull->getIterator(), TableSet);
895 }
896
897 return BB;
898}
899
900MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
901 MachineInstr &MI, MachineBasicBlock *BB) const {
902 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
903 DebugLoc DL = MI.getDebugLoc();
904
905 switch (MI.getOpcode()) {
906 default:
907 llvm_unreachable("Unexpected instr type to insert");
908 case WebAssembly::FP_TO_SINT_I32_F32:
909 return LowerFPToInt(MI, DL, BB, TII, false, false, false,
910 WebAssembly::I32_TRUNC_S_F32);
911 case WebAssembly::FP_TO_UINT_I32_F32:
912 return LowerFPToInt(MI, DL, BB, TII, true, false, false,
913 WebAssembly::I32_TRUNC_U_F32);
914 case WebAssembly::FP_TO_SINT_I64_F32:
915 return LowerFPToInt(MI, DL, BB, TII, false, true, false,
916 WebAssembly::I64_TRUNC_S_F32);
917 case WebAssembly::FP_TO_UINT_I64_F32:
918 return LowerFPToInt(MI, DL, BB, TII, true, true, false,
919 WebAssembly::I64_TRUNC_U_F32);
920 case WebAssembly::FP_TO_SINT_I32_F64:
921 return LowerFPToInt(MI, DL, BB, TII, false, false, true,
922 WebAssembly::I32_TRUNC_S_F64);
923 case WebAssembly::FP_TO_UINT_I32_F64:
924 return LowerFPToInt(MI, DL, BB, TII, true, false, true,
925 WebAssembly::I32_TRUNC_U_F64);
926 case WebAssembly::FP_TO_SINT_I64_F64:
927 return LowerFPToInt(MI, DL, BB, TII, false, true, true,
928 WebAssembly::I64_TRUNC_S_F64);
929 case WebAssembly::FP_TO_UINT_I64_F64:
930 return LowerFPToInt(MI, DL, BB, TII, true, true, true,
931 WebAssembly::I64_TRUNC_U_F64);
932 case WebAssembly::MEMCPY_A32:
933 return LowerMemcpy(MI, DL, BB, TII, false);
934 case WebAssembly::MEMCPY_A64:
935 return LowerMemcpy(MI, DL, BB, TII, true);
936 case WebAssembly::MEMSET_A32:
937 return LowerMemset(MI, DL, BB, TII, false);
938 case WebAssembly::MEMSET_A64:
939 return LowerMemset(MI, DL, BB, TII, true);
940 case WebAssembly::CALL_RESULTS:
941 case WebAssembly::RET_CALL_RESULTS:
942 return LowerCallResults(MI, DL, BB, Subtarget, TII);
943 }
944}
945
946std::pair<unsigned, const TargetRegisterClass *>
947WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
948 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
949 // First, see if this is a constraint that directly corresponds to a
950 // WebAssembly register class.
951 if (Constraint.size() == 1) {
952 switch (Constraint[0]) {
953 case 'r':
954 assert(VT != MVT::iPTR && "Pointer MVT not expected here");
955 if (Subtarget->hasSIMD128() && VT.isVector()) {
956 if (VT.getSizeInBits() == 128)
957 return std::make_pair(0U, &WebAssembly::V128RegClass);
958 }
959 if (VT.isInteger() && !VT.isVector()) {
960 if (VT.getSizeInBits() <= 32)
961 return std::make_pair(0U, &WebAssembly::I32RegClass);
962 if (VT.getSizeInBits() <= 64)
963 return std::make_pair(0U, &WebAssembly::I64RegClass);
964 }
965 if (VT.isFloatingPoint() && !VT.isVector()) {
966 switch (VT.getSizeInBits()) {
967 case 32:
968 return std::make_pair(0U, &WebAssembly::F32RegClass);
969 case 64:
970 return std::make_pair(0U, &WebAssembly::F64RegClass);
971 default:
972 break;
973 }
974 }
975 break;
976 default:
977 break;
978 }
979 }
980
982}
983
984bool WebAssemblyTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
985 // Assume ctz is a relatively cheap operation.
986 return true;
987}
988
989bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
990 // Assume clz is a relatively cheap operation.
991 return true;
992}
993
994bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
995 const AddrMode &AM,
996 Type *Ty, unsigned AS,
997 Instruction *I) const {
998 // WebAssembly offsets are added as unsigned without wrapping. The
999 // isLegalAddressingMode gives us no way to determine if wrapping could be
1000 // happening, so we approximate this by accepting only non-negative offsets.
1001 if (AM.BaseOffs < 0)
1002 return false;
1003
1004 // WebAssembly has no scale register operands.
1005 if (AM.Scale != 0)
1006 return false;
1007
1008 // Everything else is legal.
1009 return true;
1010}
1011
1012bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
1013 EVT /*VT*/, unsigned /*AddrSpace*/, Align /*Align*/,
1014 MachineMemOperand::Flags /*Flags*/, unsigned *Fast) const {
1015 // WebAssembly supports unaligned accesses, though it should be declared
1016 // with the p2align attribute on loads and stores which do so, and there
1017 // may be a performance impact. We tell LLVM they're "fast" because
1018 // for the kinds of things that LLVM uses this for (merging adjacent stores
1019 // of constants, etc.), WebAssembly implementations will either want the
1020 // unaligned access or they'll split anyway.
1021 if (Fast)
1022 *Fast = 1;
1023 return true;
1024}
1025
1026bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
1027 AttributeList Attr) const {
1028 // The current thinking is that wasm engines will perform this optimization,
1029 // so we can save on code size.
1030 return true;
1031}
1032
1033bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
1034 EVT ExtT = ExtVal.getValueType();
1035 EVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getValueType(0);
1036 return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
1037 (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
1038 (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
1039}
1040
1041bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
1042 const GlobalAddressSDNode *GA) const {
1043 // Wasm doesn't support function addresses with offsets
1044 const GlobalValue *GV = GA->getGlobal();
1046}
1047
1048EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
1049 LLVMContext &C,
1050 EVT VT) const {
1051 if (VT.isVector())
1053
1054 // So far, all branch instructions in Wasm take an I32 condition.
1055 // The default TargetLowering::getSetCCResultType returns the pointer size,
1056 // which would be useful to reduce instruction counts when testing
1057 // against 64-bit pointers/values if at some point Wasm supports that.
1058 return EVT::getIntegerVT(C, 32);
1059}
1060
1061bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1062 const CallInst &I,
1063 MachineFunction &MF,
1064 unsigned Intrinsic) const {
1065 switch (Intrinsic) {
1066 case Intrinsic::wasm_memory_atomic_notify:
1068 Info.memVT = MVT::i32;
1069 Info.ptrVal = I.getArgOperand(0);
1070 Info.offset = 0;
1071 Info.align = Align(4);
1072 // atomic.notify instruction does not really load the memory specified with
1073 // this argument, but MachineMemOperand should either be load or store, so
1074 // we set this to a load.
1075 // FIXME Volatile isn't really correct, but currently all LLVM atomic
1076 // instructions are treated as volatiles in the backend, so we should be
1077 // consistent. The same applies for wasm_atomic_wait intrinsics too.
1079 return true;
1080 case Intrinsic::wasm_memory_atomic_wait32:
1082 Info.memVT = MVT::i32;
1083 Info.ptrVal = I.getArgOperand(0);
1084 Info.offset = 0;
1085 Info.align = Align(4);
1087 return true;
1088 case Intrinsic::wasm_memory_atomic_wait64:
1090 Info.memVT = MVT::i64;
1091 Info.ptrVal = I.getArgOperand(0);
1092 Info.offset = 0;
1093 Info.align = Align(8);
1095 return true;
1096 case Intrinsic::wasm_loadf16_f32:
1098 Info.memVT = MVT::f16;
1099 Info.ptrVal = I.getArgOperand(0);
1100 Info.offset = 0;
1101 Info.align = Align(2);
1103 return true;
1104 case Intrinsic::wasm_storef16_f32:
1106 Info.memVT = MVT::f16;
1107 Info.ptrVal = I.getArgOperand(1);
1108 Info.offset = 0;
1109 Info.align = Align(2);
1111 return true;
1112 default:
1113 return false;
1114 }
1115}
1116
1117void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(
1118 const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
1119 const SelectionDAG &DAG, unsigned Depth) const {
1120 switch (Op.getOpcode()) {
1121 default:
1122 break;
1124 unsigned IntNo = Op.getConstantOperandVal(0);
1125 switch (IntNo) {
1126 default:
1127 break;
1128 case Intrinsic::wasm_bitmask: {
1129 unsigned BitWidth = Known.getBitWidth();
1130 EVT VT = Op.getOperand(1).getSimpleValueType();
1131 unsigned PossibleBits = VT.getVectorNumElements();
1132 APInt ZeroMask = APInt::getHighBitsSet(BitWidth, BitWidth - PossibleBits);
1133 Known.Zero |= ZeroMask;
1134 break;
1135 }
1136 }
1137 break;
1138 }
1139
1140 // For 128-bit addition if the upper bits are all zero then it's known that
1141 // the upper bits of the result will have all bits guaranteed zero except the
1142 // first.
1143 case WebAssemblyISD::I64_ADD128:
1144 if (Op.getResNo() == 1) {
1145 SDValue LHS_HI = Op.getOperand(1);
1146 SDValue RHS_HI = Op.getOperand(3);
1147 if (isNullConstant(LHS_HI) && isNullConstant(RHS_HI))
1148 Known.Zero.setBitsFrom(1);
1149 }
1150 break;
1151 }
1152}
1153
1155WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
1156 if (VT.isFixedLengthVector()) {
1157 MVT EltVT = VT.getVectorElementType();
1158 // We have legal vector types with these lane types, so widening the
1159 // vector would let us use some of the lanes directly without having to
1160 // extend or truncate values.
1161 if (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
1162 EltVT == MVT::i64 || EltVT == MVT::f32 || EltVT == MVT::f64)
1163 return TypeWidenVector;
1164 }
1165
1167}
1168
1169bool WebAssemblyTargetLowering::isFMAFasterThanFMulAndFAdd(
1170 const MachineFunction &MF, EVT VT) const {
1171 if (!Subtarget->hasFP16() || !VT.isVector())
1172 return false;
1173
1174 EVT ScalarVT = VT.getScalarType();
1175 if (!ScalarVT.isSimple())
1176 return false;
1177
1178 return ScalarVT.getSimpleVT().SimpleTy == MVT::f16;
1179}
1180
1181bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
1182 SDValue Op, const TargetLoweringOpt &TLO) const {
1183 // ISel process runs DAGCombiner after legalization; this step is called
1184 // SelectionDAG optimization phase. This post-legalization combining process
1185 // runs DAGCombiner on each node, and if there was a change to be made,
1186 // re-runs legalization again on it and its user nodes to make sure
1187 // everythiing is in a legalized state.
1188 //
1189 // The legalization calls lowering routines, and we do our custom lowering for
1190 // build_vectors (LowerBUILD_VECTOR), which converts undef vector elements
1191 // into zeros. But there is a set of routines in DAGCombiner that turns unused
1192 // (= not demanded) nodes into undef, among which SimplifyDemandedVectorElts
1193 // turns unused vector elements into undefs. But this routine does not work
1194 // with our custom LowerBUILD_VECTOR, which turns undefs into zeros. This
1195 // combination can result in a infinite loop, in which undefs are converted to
1196 // zeros in legalization and back to undefs in combining.
1197 //
1198 // So after DAG is legalized, we prevent SimplifyDemandedVectorElts from
1199 // running for build_vectors.
1200 if (Op.getOpcode() == ISD::BUILD_VECTOR && TLO.LegalOps && TLO.LegalTys)
1201 return false;
1202 return true;
1203}
1204
1205//===----------------------------------------------------------------------===//
1206// WebAssembly Lowering private implementation.
1207//===----------------------------------------------------------------------===//
1208
1209//===----------------------------------------------------------------------===//
1210// Lowering Code
1211//===----------------------------------------------------------------------===//
1212
1213static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
1215 DAG.getContext()->diagnose(
1216 DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
1217}
1218
1219// Test whether the given calling convention is supported.
1221 // We currently support the language-independent target-independent
1222 // conventions. We don't yet have a way to annotate calls with properties like
1223 // "cold", and we don't have any call-clobbered registers, so these are mostly
1224 // all handled the same.
1225 return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
1226 CallConv == CallingConv::Cold ||
1227 CallConv == CallingConv::PreserveMost ||
1228 CallConv == CallingConv::PreserveAll ||
1229 CallConv == CallingConv::CXX_FAST_TLS ||
1231 CallConv == CallingConv::Swift;
1232}
1233
1234SDValue
1235WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
1236 SmallVectorImpl<SDValue> &InVals) const {
1237 SelectionDAG &DAG = CLI.DAG;
1238 SDLoc DL = CLI.DL;
1239 SDValue Chain = CLI.Chain;
1240 SDValue Callee = CLI.Callee;
1241 MachineFunction &MF = DAG.getMachineFunction();
1242 auto Layout = MF.getDataLayout();
1243
1244 CallingConv::ID CallConv = CLI.CallConv;
1245 if (!callingConvSupported(CallConv))
1246 fail(DL, DAG,
1247 "WebAssembly doesn't support language-specific or target-specific "
1248 "calling conventions yet");
1249 if (CLI.IsPatchPoint)
1250 fail(DL, DAG, "WebAssembly doesn't support patch point yet");
1251
1252 if (CLI.IsTailCall) {
1253 auto NoTail = [&](const char *Msg) {
1254 if (CLI.CB && CLI.CB->isMustTailCall())
1255 fail(DL, DAG, Msg);
1256 CLI.IsTailCall = false;
1257 };
1258
1259 if (!Subtarget->hasTailCall())
1260 NoTail("WebAssembly 'tail-call' feature not enabled");
1261
1262 // Varargs calls cannot be tail calls because the buffer is on the stack
1263 if (CLI.IsVarArg)
1264 NoTail("WebAssembly does not support varargs tail calls");
1265
1266 // Do not tail call unless caller and callee return types match
1267 const Function &F = MF.getFunction();
1268 const TargetMachine &TM = getTargetMachine();
1269 Type *RetTy = F.getReturnType();
1270 SmallVector<MVT, 4> CallerRetTys;
1271 SmallVector<MVT, 4> CalleeRetTys;
1272 computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
1273 computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
1274 bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
1275 std::equal(CallerRetTys.begin(), CallerRetTys.end(),
1276 CalleeRetTys.begin());
1277 if (!TypesMatch)
1278 NoTail("WebAssembly tail call requires caller and callee return types to "
1279 "match");
1280
1281 // If pointers to local stack values are passed, we cannot tail call
1282 if (CLI.CB) {
1283 for (auto &Arg : CLI.CB->args()) {
1284 Value *Val = Arg.get();
1285 // Trace the value back through pointer operations
1286 while (true) {
1287 Value *Src = Val->stripPointerCastsAndAliases();
1288 if (auto *GEP = dyn_cast<GetElementPtrInst>(Src))
1289 Src = GEP->getPointerOperand();
1290 if (Val == Src)
1291 break;
1292 Val = Src;
1293 }
1294 if (isa<AllocaInst>(Val)) {
1295 NoTail(
1296 "WebAssembly does not support tail calling with stack arguments");
1297 break;
1298 }
1299 }
1300 }
1301 }
1302
1303 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1304 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1305 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1306
1307 // The generic code may have added an sret argument. If we're lowering an
1308 // invoke function, the ABI requires that the function pointer be the first
1309 // argument, so we may have to swap the arguments.
1310 if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
1311 Outs[0].Flags.isSRet()) {
1312 std::swap(Outs[0], Outs[1]);
1313 std::swap(OutVals[0], OutVals[1]);
1314 }
1315
1316 bool HasSwiftSelfArg = false;
1317 bool HasSwiftErrorArg = false;
1318 unsigned NumFixedArgs = 0;
1319 for (unsigned I = 0; I < Outs.size(); ++I) {
1320 const ISD::OutputArg &Out = Outs[I];
1321 SDValue &OutVal = OutVals[I];
1322 HasSwiftSelfArg |= Out.Flags.isSwiftSelf();
1323 HasSwiftErrorArg |= Out.Flags.isSwiftError();
1324 if (Out.Flags.isNest())
1325 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1326 if (Out.Flags.isInAlloca())
1327 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1328 if (Out.Flags.isInConsecutiveRegs())
1329 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1331 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1332 if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
1333 auto &MFI = MF.getFrameInfo();
1334 int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
1336 /*isSS=*/false);
1337 SDValue SizeNode =
1338 DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
1339 SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1340 Chain = DAG.getMemcpy(Chain, DL, FINode, OutVal, SizeNode,
1342 /*isVolatile*/ false, /*AlwaysInline=*/false,
1343 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(),
1344 MachinePointerInfo());
1345 OutVal = FINode;
1346 }
1347 // Count the number of fixed args *after* legalization.
1348 NumFixedArgs += !Out.Flags.isVarArg();
1349 }
1350
1351 bool IsVarArg = CLI.IsVarArg;
1352 auto PtrVT = getPointerTy(Layout);
1353
1354 // For swiftcc, emit additional swiftself and swifterror arguments
1355 // if there aren't. These additional arguments are also added for callee
1356 // signature They are necessary to match callee and caller signature for
1357 // indirect call.
1358 if (CallConv == CallingConv::Swift) {
1359 Type *PtrTy = PointerType::getUnqual(*DAG.getContext());
1360 if (!HasSwiftSelfArg) {
1361 NumFixedArgs++;
1362 ISD::ArgFlagsTy Flags;
1363 Flags.setSwiftSelf();
1364 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1365 CLI.Outs.push_back(Arg);
1366 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1367 CLI.OutVals.push_back(ArgVal);
1368 }
1369 if (!HasSwiftErrorArg) {
1370 NumFixedArgs++;
1371 ISD::ArgFlagsTy Flags;
1372 Flags.setSwiftError();
1373 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1374 CLI.Outs.push_back(Arg);
1375 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1376 CLI.OutVals.push_back(ArgVal);
1377 }
1378 }
1379
1380 // Analyze operands of the call, assigning locations to each operand.
1382 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1383
1384 if (IsVarArg) {
1385 // Outgoing non-fixed arguments are placed in a buffer. First
1386 // compute their offsets and the total amount of buffer space needed.
1387 for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
1388 const ISD::OutputArg &Out = Outs[I];
1389 SDValue &Arg = OutVals[I];
1390 EVT VT = Arg.getValueType();
1391 assert(VT != MVT::iPTR && "Legalized args should be concrete");
1392 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
1393 Align Alignment =
1394 std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty));
1395 unsigned Offset =
1396 CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment);
1397 CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
1398 Offset, VT.getSimpleVT(),
1400 }
1401 }
1402
1403 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
1404
1405 SDValue FINode;
1406 if (IsVarArg && NumBytes) {
1407 // For non-fixed arguments, next emit stores to store the argument values
1408 // to the stack buffer at the offsets computed above.
1409 MaybeAlign StackAlign = Layout.getStackAlignment();
1410 assert(StackAlign && "data layout string is missing stack alignment");
1411 int FI = MF.getFrameInfo().CreateStackObject(NumBytes, *StackAlign,
1412 /*isSS=*/false);
1413 unsigned ValNo = 0;
1415 for (SDValue Arg : drop_begin(OutVals, NumFixedArgs)) {
1416 assert(ArgLocs[ValNo].getValNo() == ValNo &&
1417 "ArgLocs should remain in order and only hold varargs args");
1418 unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
1419 FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1420 SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
1421 DAG.getConstant(Offset, DL, PtrVT));
1422 Chains.push_back(
1423 DAG.getStore(Chain, DL, Arg, Add,
1425 }
1426 if (!Chains.empty())
1427 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
1428 } else if (IsVarArg) {
1429 FINode = DAG.getIntPtrConstant(0, DL);
1430 }
1431
1432 if (Callee->getOpcode() == ISD::GlobalAddress) {
1433 // If the callee is a GlobalAddress node (quite common, every direct call
1434 // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
1435 // doesn't at MO_GOT which is not needed for direct calls.
1436 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Callee);
1439 GA->getOffset());
1440 Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
1441 getPointerTy(DAG.getDataLayout()), Callee);
1442 }
1443
1444 // Compute the operands for the CALLn node.
1446 Ops.push_back(Chain);
1447 Ops.push_back(Callee);
1448
1449 // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
1450 // isn't reliable.
1451 Ops.append(OutVals.begin(),
1452 IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
1453 // Add a pointer to the vararg buffer.
1454 if (IsVarArg)
1455 Ops.push_back(FINode);
1456
1457 SmallVector<EVT, 8> InTys;
1458 for (const auto &In : Ins) {
1459 assert(!In.Flags.isByVal() && "byval is not valid for return values");
1460 assert(!In.Flags.isNest() && "nest is not valid for return values");
1461 if (In.Flags.isInAlloca())
1462 fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
1463 if (In.Flags.isInConsecutiveRegs())
1464 fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
1465 if (In.Flags.isInConsecutiveRegsLast())
1466 fail(DL, DAG,
1467 "WebAssembly hasn't implemented cons regs last return values");
1468 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1469 // registers.
1470 InTys.push_back(In.VT);
1471 }
1472
1473 // Lastly, if this is a call to a funcref we need to add an instruction
1474 // table.set to the chain and transform the call.
1476 CLI.CB->getCalledOperand()->getType())) {
1477 // In the absence of function references proposal where a funcref call is
1478 // lowered to call_ref, using reference types we generate a table.set to set
1479 // the funcref to a special table used solely for this purpose, followed by
1480 // a call_indirect. Here we just generate the table set, and return the
1481 // SDValue of the table.set so that LowerCall can finalize the lowering by
1482 // generating the call_indirect.
1483 SDValue Chain = Ops[0];
1484
1486 MF.getContext(), Subtarget);
1487 SDValue Sym = DAG.getMCSymbol(Table, PtrVT);
1488 SDValue TableSlot = DAG.getConstant(0, DL, MVT::i32);
1489 SDValue TableSetOps[] = {Chain, Sym, TableSlot, Callee};
1490 SDValue TableSet = DAG.getMemIntrinsicNode(
1491 WebAssemblyISD::TABLE_SET, DL, DAG.getVTList(MVT::Other), TableSetOps,
1492 MVT::funcref,
1493 // Machine Mem Operand args
1494 MachinePointerInfo(
1496 CLI.CB->getCalledOperand()->getPointerAlignment(DAG.getDataLayout()),
1498
1499 Ops[0] = TableSet; // The new chain is the TableSet itself
1500 }
1501
1502 if (CLI.IsTailCall) {
1503 // ret_calls do not return values to the current frame
1504 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1505 return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
1506 }
1507
1508 InTys.push_back(MVT::Other);
1509 SDVTList InTyList = DAG.getVTList(InTys);
1510 SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops);
1511
1512 for (size_t I = 0; I < Ins.size(); ++I)
1513 InVals.push_back(Res.getValue(I));
1514
1515 // Return the chain
1516 return Res.getValue(Ins.size());
1517}
1518
1519bool WebAssemblyTargetLowering::CanLowerReturn(
1520 CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
1521 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext & /*Context*/,
1522 const Type *RetTy) const {
1523 // WebAssembly can only handle returning tuples with multivalue enabled
1524 return WebAssembly::canLowerReturn(Outs.size(), Subtarget);
1525}
1526
1527SDValue WebAssemblyTargetLowering::LowerReturn(
1528 SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
1530 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1531 SelectionDAG &DAG) const {
1532 assert(WebAssembly::canLowerReturn(Outs.size(), Subtarget) &&
1533 "MVP WebAssembly can only return up to one value");
1534 if (!callingConvSupported(CallConv))
1535 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1536
1537 SmallVector<SDValue, 4> RetOps(1, Chain);
1538 RetOps.append(OutVals.begin(), OutVals.end());
1539 Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
1540
1541 // Record the number and types of the return values.
1542 for (const ISD::OutputArg &Out : Outs) {
1543 assert(!Out.Flags.isByVal() && "byval is not valid for return values");
1544 assert(!Out.Flags.isNest() && "nest is not valid for return values");
1545 assert(!Out.Flags.isVarArg() && "non-fixed return value is not valid");
1546 if (Out.Flags.isInAlloca())
1547 fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
1548 if (Out.Flags.isInConsecutiveRegs())
1549 fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
1551 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
1552 }
1553
1554 return Chain;
1555}
1556
1557SDValue WebAssemblyTargetLowering::LowerFormalArguments(
1558 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1559 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1560 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1561 if (!callingConvSupported(CallConv))
1562 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1563
1564 MachineFunction &MF = DAG.getMachineFunction();
1565 auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
1566
1567 // Set up the incoming ARGUMENTS value, which serves to represent the liveness
1568 // of the incoming values before they're represented by virtual registers.
1569 MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
1570
1571 bool HasSwiftErrorArg = false;
1572 bool HasSwiftSelfArg = false;
1573 for (const ISD::InputArg &In : Ins) {
1574 HasSwiftSelfArg |= In.Flags.isSwiftSelf();
1575 HasSwiftErrorArg |= In.Flags.isSwiftError();
1576 if (In.Flags.isInAlloca())
1577 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1578 if (In.Flags.isNest())
1579 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1580 if (In.Flags.isInConsecutiveRegs())
1581 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1582 if (In.Flags.isInConsecutiveRegsLast())
1583 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1584 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1585 // registers.
1586 InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
1587 DAG.getTargetConstant(InVals.size(),
1588 DL, MVT::i32))
1589 : DAG.getUNDEF(In.VT));
1590
1591 // Record the number and types of arguments.
1592 MFI->addParam(In.VT);
1593 }
1594
1595 // For swiftcc, emit additional swiftself and swifterror arguments
1596 // if there aren't. These additional arguments are also added for callee
1597 // signature They are necessary to match callee and caller signature for
1598 // indirect call.
1599 auto PtrVT = getPointerTy(MF.getDataLayout());
1600 if (CallConv == CallingConv::Swift) {
1601 if (!HasSwiftSelfArg) {
1602 MFI->addParam(PtrVT);
1603 }
1604 if (!HasSwiftErrorArg) {
1605 MFI->addParam(PtrVT);
1606 }
1607 }
1608 // Varargs are copied into a buffer allocated by the caller, and a pointer to
1609 // the buffer is passed as an argument.
1610 if (IsVarArg) {
1611 MVT PtrVT = getPointerTy(MF.getDataLayout());
1612 Register VarargVreg =
1614 MFI->setVarargBufferVreg(VarargVreg);
1615 Chain = DAG.getCopyToReg(
1616 Chain, DL, VarargVreg,
1617 DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
1618 DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
1619 MFI->addParam(PtrVT);
1620 }
1621
1622 // Record the number and types of arguments and results.
1623 SmallVector<MVT, 4> Params;
1626 MF.getFunction(), DAG.getTarget(), Params, Results);
1627 for (MVT VT : Results)
1628 MFI->addResult(VT);
1629 // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1630 // the param logic here with ComputeSignatureVTs
1631 assert(MFI->getParams().size() == Params.size() &&
1632 std::equal(MFI->getParams().begin(), MFI->getParams().end(),
1633 Params.begin()));
1634
1635 return Chain;
1636}
1637
1638void WebAssemblyTargetLowering::ReplaceNodeResults(
1640 switch (N->getOpcode()) {
1642 // Do not add any results, signifying that N should not be custom lowered
1643 // after all. This happens because simd128 turns on custom lowering for
1644 // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1645 // illegal type.
1646 break;
1649 // Do not add any results, signifying that N should not be custom lowered.
1650 // EXTEND_VECTOR_INREG is implemented for some vectors, but not all.
1651 break;
1652 case ISD::ADD:
1653 case ISD::SUB:
1654 Results.push_back(Replace128Op(N, DAG));
1655 break;
1656 default:
1658 "ReplaceNodeResults not implemented for this op for WebAssembly!");
1659 }
1660}
1661
1662//===----------------------------------------------------------------------===//
1663// Custom lowering hooks.
1664//===----------------------------------------------------------------------===//
1665
1666SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
1667 SelectionDAG &DAG) const {
1668 SDLoc DL(Op);
1669 switch (Op.getOpcode()) {
1670 default:
1671 llvm_unreachable("unimplemented operation lowering");
1672 return SDValue();
1673 case ISD::FrameIndex:
1674 return LowerFrameIndex(Op, DAG);
1675 case ISD::GlobalAddress:
1676 return LowerGlobalAddress(Op, DAG);
1678 return LowerGlobalTLSAddress(Op, DAG);
1680 return LowerExternalSymbol(Op, DAG);
1681 case ISD::JumpTable:
1682 return LowerJumpTable(Op, DAG);
1683 case ISD::BR_JT:
1684 return LowerBR_JT(Op, DAG);
1685 case ISD::VASTART:
1686 return LowerVASTART(Op, DAG);
1687 case ISD::BlockAddress:
1688 case ISD::BRIND:
1689 fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
1690 return SDValue();
1691 case ISD::RETURNADDR:
1692 return LowerRETURNADDR(Op, DAG);
1693 case ISD::FRAMEADDR:
1694 return LowerFRAMEADDR(Op, DAG);
1695 case ISD::CopyToReg:
1696 return LowerCopyToReg(Op, DAG);
1699 return LowerAccessVectorElement(Op, DAG);
1703 return LowerIntrinsic(Op, DAG);
1705 return LowerSIGN_EXTEND_INREG(Op, DAG);
1708 return LowerEXTEND_VECTOR_INREG(Op, DAG);
1709 case ISD::BUILD_VECTOR:
1710 return LowerBUILD_VECTOR(Op, DAG);
1712 return LowerVECTOR_SHUFFLE(Op, DAG);
1713 case ISD::SETCC:
1714 return LowerSETCC(Op, DAG);
1715 case ISD::SHL:
1716 case ISD::SRA:
1717 case ISD::SRL:
1718 return LowerShift(Op, DAG);
1721 return LowerFP_TO_INT_SAT(Op, DAG);
1722 case ISD::LOAD:
1723 return LowerLoad(Op, DAG);
1724 case ISD::STORE:
1725 return LowerStore(Op, DAG);
1726 case ISD::CTPOP:
1727 case ISD::CTLZ:
1728 case ISD::CTTZ:
1729 return DAG.UnrollVectorOp(Op.getNode());
1730 case ISD::CLEAR_CACHE:
1731 report_fatal_error("llvm.clear_cache is not supported on wasm");
1732 case ISD::SMUL_LOHI:
1733 case ISD::UMUL_LOHI:
1734 return LowerMUL_LOHI(Op, DAG);
1735 case ISD::UADDO:
1736 return LowerUADDO(Op, DAG);
1737 }
1738}
1739
1743
1744 return false;
1745}
1746
1747static std::optional<unsigned> IsWebAssemblyLocal(SDValue Op,
1748 SelectionDAG &DAG) {
1750 if (!FI)
1751 return std::nullopt;
1752
1753 auto &MF = DAG.getMachineFunction();
1755}
1756
1757SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,
1758 SelectionDAG &DAG) const {
1759 SDLoc DL(Op);
1760 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
1761 const SDValue &Value = SN->getValue();
1762 const SDValue &Base = SN->getBasePtr();
1763 const SDValue &Offset = SN->getOffset();
1764
1766 if (!Offset->isUndef())
1767 report_fatal_error("unexpected offset when storing to webassembly global",
1768 false);
1769
1770 SDVTList Tys = DAG.getVTList(MVT::Other);
1771 SDValue Ops[] = {SN->getChain(), Value, Base};
1772 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_SET, DL, Tys, Ops,
1773 SN->getMemoryVT(), SN->getMemOperand());
1774 }
1775
1776 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1777 if (!Offset->isUndef())
1778 report_fatal_error("unexpected offset when storing to webassembly local",
1779 false);
1780
1781 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1782 SDVTList Tys = DAG.getVTList(MVT::Other); // The chain.
1783 SDValue Ops[] = {SN->getChain(), Idx, Value};
1784 return DAG.getNode(WebAssemblyISD::LOCAL_SET, DL, Tys, Ops);
1785 }
1786
1789 "Encountered an unlowerable store to the wasm_var address space",
1790 false);
1791
1792 return Op;
1793}
1794
1795SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op,
1796 SelectionDAG &DAG) const {
1797 SDLoc DL(Op);
1798 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
1799 const SDValue &Base = LN->getBasePtr();
1800 const SDValue &Offset = LN->getOffset();
1801
1803 if (!Offset->isUndef())
1805 "unexpected offset when loading from webassembly global", false);
1806
1807 SDVTList Tys = DAG.getVTList(LN->getValueType(0), MVT::Other);
1808 SDValue Ops[] = {LN->getChain(), Base};
1809 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_GET, DL, Tys, Ops,
1810 LN->getMemoryVT(), LN->getMemOperand());
1811 }
1812
1813 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1814 if (!Offset->isUndef())
1816 "unexpected offset when loading from webassembly local", false);
1817
1818 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1819 EVT LocalVT = LN->getValueType(0);
1820 return DAG.getNode(WebAssemblyISD::LOCAL_GET, DL, {LocalVT, MVT::Other},
1821 {LN->getChain(), Idx});
1822 }
1823
1826 "Encountered an unlowerable load from the wasm_var address space",
1827 false);
1828
1829 return Op;
1830}
1831
1832SDValue WebAssemblyTargetLowering::LowerMUL_LOHI(SDValue Op,
1833 SelectionDAG &DAG) const {
1834 assert(Subtarget->hasWideArithmetic());
1835 assert(Op.getValueType() == MVT::i64);
1836 SDLoc DL(Op);
1837 unsigned Opcode;
1838 switch (Op.getOpcode()) {
1839 case ISD::UMUL_LOHI:
1840 Opcode = WebAssemblyISD::I64_MUL_WIDE_U;
1841 break;
1842 case ISD::SMUL_LOHI:
1843 Opcode = WebAssemblyISD::I64_MUL_WIDE_S;
1844 break;
1845 default:
1846 llvm_unreachable("unexpected opcode");
1847 }
1848 SDValue LHS = Op.getOperand(0);
1849 SDValue RHS = Op.getOperand(1);
1850 SDValue Lo =
1851 DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64), LHS, RHS);
1852 SDValue Hi(Lo.getNode(), 1);
1853 SDValue Ops[] = {Lo, Hi};
1854 return DAG.getMergeValues(Ops, DL);
1855}
1856
1857// Lowers `UADDO` intrinsics to an `i64.add128` instruction when it's enabled.
1858//
1859// This enables generating a single wasm instruction for this operation where
1860// the upper half of both operands are constant zeros. The upper half of the
1861// result is then whether the overflow happened.
1862SDValue WebAssemblyTargetLowering::LowerUADDO(SDValue Op,
1863 SelectionDAG &DAG) const {
1864 assert(Subtarget->hasWideArithmetic());
1865 assert(Op.getValueType() == MVT::i64);
1866 assert(Op.getOpcode() == ISD::UADDO);
1867 SDLoc DL(Op);
1868 SDValue LHS = Op.getOperand(0);
1869 SDValue RHS = Op.getOperand(1);
1870 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
1871 SDValue Result =
1872 DAG.getNode(WebAssemblyISD::I64_ADD128, DL,
1873 DAG.getVTList(MVT::i64, MVT::i64), LHS, Zero, RHS, Zero);
1874 SDValue CarryI64(Result.getNode(), 1);
1875 SDValue CarryI32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, CarryI64);
1876 SDValue Ops[] = {Result, CarryI32};
1877 return DAG.getMergeValues(Ops, DL);
1878}
1879
1880SDValue WebAssemblyTargetLowering::Replace128Op(SDNode *N,
1881 SelectionDAG &DAG) const {
1882 assert(Subtarget->hasWideArithmetic());
1883 assert(N->getValueType(0) == MVT::i128);
1884 SDLoc DL(N);
1885 unsigned Opcode;
1886 switch (N->getOpcode()) {
1887 case ISD::ADD:
1888 Opcode = WebAssemblyISD::I64_ADD128;
1889 break;
1890 case ISD::SUB:
1891 Opcode = WebAssemblyISD::I64_SUB128;
1892 break;
1893 default:
1894 llvm_unreachable("unexpected opcode");
1895 }
1896 SDValue LHS = N->getOperand(0);
1897 SDValue RHS = N->getOperand(1);
1898
1899 SDValue C0 = DAG.getConstant(0, DL, MVT::i64);
1900 SDValue C1 = DAG.getConstant(1, DL, MVT::i64);
1901 SDValue LHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C0);
1902 SDValue LHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C1);
1903 SDValue RHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C0);
1904 SDValue RHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C1);
1905 SDValue Result_LO = DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64),
1906 LHS_0, LHS_1, RHS_0, RHS_1);
1907 SDValue Result_HI(Result_LO.getNode(), 1);
1908 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getVTList(), Result_LO, Result_HI);
1909}
1910
1911SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1912 SelectionDAG &DAG) const {
1913 SDValue Src = Op.getOperand(2);
1914 if (isa<FrameIndexSDNode>(Src.getNode())) {
1915 // CopyToReg nodes don't support FrameIndex operands. Other targets select
1916 // the FI to some LEA-like instruction, but since we don't have that, we
1917 // need to insert some kind of instruction that can take an FI operand and
1918 // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1919 // local.copy between Op and its FI operand.
1920 SDValue Chain = Op.getOperand(0);
1921 SDLoc DL(Op);
1922 Register Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
1923 EVT VT = Src.getValueType();
1924 SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
1925 : WebAssembly::COPY_I64,
1926 DL, VT, Src),
1927 0);
1928 return Op.getNode()->getNumValues() == 1
1929 ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
1930 : DAG.getCopyToReg(Chain, DL, Reg, Copy,
1931 Op.getNumOperands() == 4 ? Op.getOperand(3)
1932 : SDValue());
1933 }
1934 return SDValue();
1935}
1936
1937SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
1938 SelectionDAG &DAG) const {
1939 int FI = cast<FrameIndexSDNode>(Op)->getIndex();
1940 return DAG.getTargetFrameIndex(FI, Op.getValueType());
1941}
1942
1943SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
1944 SelectionDAG &DAG) const {
1945 SDLoc DL(Op);
1946
1947 if (!Subtarget->getTargetTriple().isOSEmscripten()) {
1948 fail(DL, DAG,
1949 "Non-Emscripten WebAssembly hasn't implemented "
1950 "__builtin_return_address");
1951 return SDValue();
1952 }
1953
1954 unsigned Depth = Op.getConstantOperandVal(0);
1955 MakeLibCallOptions CallOptions;
1956 return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
1957 {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
1958 .first;
1959}
1960
1961SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
1962 SelectionDAG &DAG) const {
1963 // Non-zero depths are not supported by WebAssembly currently. Use the
1964 // legalizer's default expansion, which is to return 0 (what this function is
1965 // documented to do).
1966 if (Op.getConstantOperandVal(0) > 0)
1967 return SDValue();
1968
1970 EVT VT = Op.getValueType();
1971 Register FP =
1972 Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
1973 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
1974}
1975
1976SDValue
1977WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
1978 SelectionDAG &DAG) const {
1979 SDLoc DL(Op);
1980 const auto *GA = cast<GlobalAddressSDNode>(Op);
1981
1982 MachineFunction &MF = DAG.getMachineFunction();
1983 if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
1984 report_fatal_error("cannot use thread-local storage without bulk memory",
1985 false);
1986
1987 const GlobalValue *GV = GA->getGlobal();
1988
1989 // Currently only Emscripten supports dynamic linking with threads. Therefore,
1990 // on other targets, if we have thread-local storage, only the local-exec
1991 // model is possible.
1992 auto model = Subtarget->getTargetTriple().isOSEmscripten()
1993 ? GV->getThreadLocalMode()
1995
1996 // Unsupported TLS modes
1999
2000 if (model == GlobalValue::LocalExecTLSModel ||
2003 getTargetMachine().shouldAssumeDSOLocal(GV))) {
2004 // For DSO-local TLS variables we use offset from __tls_base
2005
2006 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2007 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
2008 : WebAssembly::GLOBAL_GET_I32;
2009 const char *BaseName = MF.createExternalSymbolName("__tls_base");
2010
2012 DAG.getMachineNode(GlobalGet, DL, PtrVT,
2013 DAG.getTargetExternalSymbol(BaseName, PtrVT)),
2014 0);
2015
2016 SDValue TLSOffset = DAG.getTargetGlobalAddress(
2017 GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL);
2018 SDValue SymOffset =
2019 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, TLSOffset);
2020
2021 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymOffset);
2022 }
2023
2025
2026 EVT VT = Op.getValueType();
2027 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2028 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2029 GA->getOffset(),
2031}
2032
2033SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
2034 SelectionDAG &DAG) const {
2035 SDLoc DL(Op);
2036 const auto *GA = cast<GlobalAddressSDNode>(Op);
2037 EVT VT = Op.getValueType();
2038 assert(GA->getTargetFlags() == 0 &&
2039 "Unexpected target flags on generic GlobalAddressSDNode");
2041 fail(DL, DAG, "Invalid address space for WebAssembly target");
2042
2043 unsigned OperandFlags = 0;
2044 const GlobalValue *GV = GA->getGlobal();
2045 // Since WebAssembly tables cannot yet be shared accross modules, we don't
2046 // need special treatment for tables in PIC mode.
2047 if (isPositionIndependent() &&
2049 if (getTargetMachine().shouldAssumeDSOLocal(GV)) {
2050 MachineFunction &MF = DAG.getMachineFunction();
2051 MVT PtrVT = getPointerTy(MF.getDataLayout());
2052 const char *BaseName;
2053 if (GV->getValueType()->isFunctionTy()) {
2054 BaseName = MF.createExternalSymbolName("__table_base");
2056 } else {
2057 BaseName = MF.createExternalSymbolName("__memory_base");
2059 }
2061 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2062 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2063
2064 SDValue SymAddr = DAG.getNode(
2065 WebAssemblyISD::WrapperREL, DL, VT,
2066 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
2067 OperandFlags));
2068
2069 return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
2070 }
2072 }
2073
2074 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2075 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2076 GA->getOffset(), OperandFlags));
2077}
2078
2079SDValue
2080WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
2081 SelectionDAG &DAG) const {
2082 SDLoc DL(Op);
2083 const auto *ES = cast<ExternalSymbolSDNode>(Op);
2084 EVT VT = Op.getValueType();
2085 assert(ES->getTargetFlags() == 0 &&
2086 "Unexpected target flags on generic ExternalSymbolSDNode");
2087 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2088 DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
2089}
2090
2091SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
2092 SelectionDAG &DAG) const {
2093 // There's no need for a Wrapper node because we always incorporate a jump
2094 // table operand into a BR_TABLE instruction, rather than ever
2095 // materializing it in a register.
2096 const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2097 return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
2098 JT->getTargetFlags());
2099}
2100
2101SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
2102 SelectionDAG &DAG) const {
2103 SDLoc DL(Op);
2104 SDValue Chain = Op.getOperand(0);
2105 const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
2106 SDValue Index = Op.getOperand(2);
2107 assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
2108
2110 Ops.push_back(Chain);
2111 Ops.push_back(Index);
2112
2113 MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
2114 const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
2115
2116 // Add an operand for each case.
2117 for (auto *MBB : MBBs)
2118 Ops.push_back(DAG.getBasicBlock(MBB));
2119
2120 // Add the first MBB as a dummy default target for now. This will be replaced
2121 // with the proper default target (and the preceding range check eliminated)
2122 // if possible by WebAssemblyFixBrTableDefaults.
2123 Ops.push_back(DAG.getBasicBlock(*MBBs.begin()));
2124 return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
2125}
2126
2127SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
2128 SelectionDAG &DAG) const {
2129 SDLoc DL(Op);
2130 EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
2131
2132 auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
2133 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2134
2135 SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
2136 MFI->getVarargBufferVreg(), PtrVT);
2137 return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
2138 MachinePointerInfo(SV));
2139}
2140
2141SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
2142 SelectionDAG &DAG) const {
2143 MachineFunction &MF = DAG.getMachineFunction();
2144 unsigned IntNo;
2145 switch (Op.getOpcode()) {
2148 IntNo = Op.getConstantOperandVal(1);
2149 break;
2151 IntNo = Op.getConstantOperandVal(0);
2152 break;
2153 default:
2154 llvm_unreachable("Invalid intrinsic");
2155 }
2156 SDLoc DL(Op);
2157
2158 switch (IntNo) {
2159 default:
2160 return SDValue(); // Don't custom lower most intrinsics.
2161
2162 case Intrinsic::wasm_lsda: {
2163 auto PtrVT = getPointerTy(MF.getDataLayout());
2164 const char *SymName = MF.createExternalSymbolName(
2165 "GCC_except_table" + std::to_string(MF.getFunctionNumber()));
2166 if (isPositionIndependent()) {
2168 SymName, PtrVT, WebAssemblyII::MO_MEMORY_BASE_REL);
2169 const char *BaseName = MF.createExternalSymbolName("__memory_base");
2171 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2172 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2173 SDValue SymAddr =
2174 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, Node);
2175 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr);
2176 }
2177 SDValue Node = DAG.getTargetExternalSymbol(SymName, PtrVT);
2178 return DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, Node);
2179 }
2180
2181 case Intrinsic::wasm_shuffle: {
2182 // Drop in-chain and replace undefs, but otherwise pass through unchanged
2183 SDValue Ops[18];
2184 size_t OpIdx = 0;
2185 Ops[OpIdx++] = Op.getOperand(1);
2186 Ops[OpIdx++] = Op.getOperand(2);
2187 while (OpIdx < 18) {
2188 const SDValue &MaskIdx = Op.getOperand(OpIdx + 1);
2189 if (MaskIdx.isUndef() || MaskIdx.getNode()->getAsZExtVal() >= 32) {
2190 bool isTarget = MaskIdx.getNode()->getOpcode() == ISD::TargetConstant;
2191 Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32, isTarget);
2192 } else {
2193 Ops[OpIdx++] = MaskIdx;
2194 }
2195 }
2196 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2197 }
2198
2199 case Intrinsic::thread_pointer: {
2200 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2201 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
2202 : WebAssembly::GLOBAL_GET_I32;
2203 const char *TlsBase = MF.createExternalSymbolName("__tls_base");
2204 return SDValue(
2205 DAG.getMachineNode(GlobalGet, DL, PtrVT,
2206 DAG.getTargetExternalSymbol(TlsBase, PtrVT)),
2207 0);
2208 }
2209 }
2210}
2211
2212SDValue
2213WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
2214 SelectionDAG &DAG) const {
2215 SDLoc DL(Op);
2216 // If sign extension operations are disabled, allow sext_inreg only if operand
2217 // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
2218 // extension operations, but allowing sext_inreg in this context lets us have
2219 // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
2220 // everywhere would be simpler in this file, but would necessitate large and
2221 // brittle patterns to undo the expansion and select extract_lane_s
2222 // instructions.
2223 assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
2224 if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2225 return SDValue();
2226
2227 const SDValue &Extract = Op.getOperand(0);
2228 MVT VecT = Extract.getOperand(0).getSimpleValueType();
2229 if (VecT.getVectorElementType().getSizeInBits() > 32)
2230 return SDValue();
2231 MVT ExtractedLaneT =
2232 cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT();
2233 MVT ExtractedVecT =
2234 MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
2235 if (ExtractedVecT == VecT)
2236 return Op;
2237
2238 // Bitcast vector to appropriate type to ensure ISel pattern coverage
2239 const SDNode *Index = Extract.getOperand(1).getNode();
2240 if (!isa<ConstantSDNode>(Index))
2241 return SDValue();
2242 unsigned IndexVal = Index->getAsZExtVal();
2243 unsigned Scale =
2244 ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
2245 assert(Scale > 1);
2246 SDValue NewIndex =
2247 DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0));
2248 SDValue NewExtract = DAG.getNode(
2250 DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
2251 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
2252 Op.getOperand(1));
2253}
2254
2255static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT,
2256 SelectionDAG &DAG) {
2257 if (Op.getOpcode() != ISD::VECTOR_SHUFFLE)
2258 return SDValue();
2259
2260 assert((UserOpc == WebAssemblyISD::EXTEND_LOW_U ||
2261 UserOpc == WebAssemblyISD::EXTEND_LOW_S) &&
2262 "expected extend_low");
2263 auto *Shuffle = cast<ShuffleVectorSDNode>(Op.getNode());
2264
2265 ArrayRef<int> Mask = Shuffle->getMask();
2266 // Look for a shuffle which moves from the high half to the low half.
2267 size_t FirstIdx = Mask.size() / 2;
2268 for (size_t i = 0; i < Mask.size() / 2; ++i) {
2269 if (Mask[i] != static_cast<int>(FirstIdx + i)) {
2270 return SDValue();
2271 }
2272 }
2273
2274 SDLoc DL(Op);
2275 unsigned Opc = UserOpc == WebAssemblyISD::EXTEND_LOW_S
2276 ? WebAssemblyISD::EXTEND_HIGH_S
2277 : WebAssemblyISD::EXTEND_HIGH_U;
2278 return DAG.getNode(Opc, DL, VT, Shuffle->getOperand(0));
2279}
2280
2281SDValue
2282WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
2283 SelectionDAG &DAG) const {
2284 SDLoc DL(Op);
2285 EVT VT = Op.getValueType();
2286 SDValue Src = Op.getOperand(0);
2287 EVT SrcVT = Src.getValueType();
2288
2289 if (SrcVT.getVectorElementType() == MVT::i1 ||
2290 SrcVT.getVectorElementType() == MVT::i64)
2291 return SDValue();
2292
2293 assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 &&
2294 "Unexpected extension factor.");
2295 unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
2296
2297 if (Scale != 2 && Scale != 4 && Scale != 8)
2298 return SDValue();
2299
2300 unsigned Ext;
2301 switch (Op.getOpcode()) {
2303 Ext = WebAssemblyISD::EXTEND_LOW_U;
2304 break;
2306 Ext = WebAssemblyISD::EXTEND_LOW_S;
2307 break;
2308 }
2309
2310 if (Scale == 2) {
2311 // See if we can use EXTEND_HIGH.
2312 if (auto ExtendHigh = GetExtendHigh(Op.getOperand(0), Ext, VT, DAG))
2313 return ExtendHigh;
2314 }
2315
2316 SDValue Ret = Src;
2317 while (Scale != 1) {
2318 Ret = DAG.getNode(Ext, DL,
2319 Ret.getValueType()
2320 .widenIntegerVectorElementType(*DAG.getContext())
2321 .getHalfNumVectorElementsVT(*DAG.getContext()),
2322 Ret);
2323 Scale /= 2;
2324 }
2325 assert(Ret.getValueType() == VT);
2326 return Ret;
2327}
2328
2330 SDLoc DL(Op);
2331 if (Op.getValueType() != MVT::v2f64)
2332 return SDValue();
2333
2334 auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
2335 unsigned &Index) -> bool {
2336 switch (Op.getOpcode()) {
2337 case ISD::SINT_TO_FP:
2338 Opcode = WebAssemblyISD::CONVERT_LOW_S;
2339 break;
2340 case ISD::UINT_TO_FP:
2341 Opcode = WebAssemblyISD::CONVERT_LOW_U;
2342 break;
2343 case ISD::FP_EXTEND:
2344 Opcode = WebAssemblyISD::PROMOTE_LOW;
2345 break;
2346 default:
2347 return false;
2348 }
2349
2350 auto ExtractVector = Op.getOperand(0);
2351 if (ExtractVector.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2352 return false;
2353
2354 if (!isa<ConstantSDNode>(ExtractVector.getOperand(1).getNode()))
2355 return false;
2356
2357 SrcVec = ExtractVector.getOperand(0);
2358 Index = ExtractVector.getConstantOperandVal(1);
2359 return true;
2360 };
2361
2362 unsigned LHSOpcode, RHSOpcode, LHSIndex, RHSIndex;
2363 SDValue LHSSrcVec, RHSSrcVec;
2364 if (!GetConvertedLane(Op.getOperand(0), LHSOpcode, LHSSrcVec, LHSIndex) ||
2365 !GetConvertedLane(Op.getOperand(1), RHSOpcode, RHSSrcVec, RHSIndex))
2366 return SDValue();
2367
2368 if (LHSOpcode != RHSOpcode)
2369 return SDValue();
2370
2371 MVT ExpectedSrcVT;
2372 switch (LHSOpcode) {
2373 case WebAssemblyISD::CONVERT_LOW_S:
2374 case WebAssemblyISD::CONVERT_LOW_U:
2375 ExpectedSrcVT = MVT::v4i32;
2376 break;
2377 case WebAssemblyISD::PROMOTE_LOW:
2378 ExpectedSrcVT = MVT::v4f32;
2379 break;
2380 }
2381 if (LHSSrcVec.getValueType() != ExpectedSrcVT)
2382 return SDValue();
2383
2384 auto Src = LHSSrcVec;
2385 if (LHSIndex != 0 || RHSIndex != 1 || LHSSrcVec != RHSSrcVec) {
2386 // Shuffle the source vector so that the converted lanes are the low lanes.
2387 Src = DAG.getVectorShuffle(
2388 ExpectedSrcVT, DL, LHSSrcVec, RHSSrcVec,
2389 {static_cast<int>(LHSIndex), static_cast<int>(RHSIndex) + 4, -1, -1});
2390 }
2391 return DAG.getNode(LHSOpcode, DL, MVT::v2f64, Src);
2392}
2393
2394SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
2395 SelectionDAG &DAG) const {
2396 MVT VT = Op.getSimpleValueType();
2397 if (VT == MVT::v8f16) {
2398 // BUILD_VECTOR can't handle FP16 operands since Wasm doesn't have a scaler
2399 // FP16 type, so cast them to I16s.
2400 MVT IVT = VT.changeVectorElementType(MVT::i16);
2402 for (unsigned I = 0, E = Op.getNumOperands(); I < E; ++I)
2403 NewOps.push_back(DAG.getBitcast(MVT::i16, Op.getOperand(I)));
2404 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(), IVT, NewOps);
2405 return DAG.getBitcast(VT, Res);
2406 }
2407
2408 if (auto ConvertLow = LowerConvertLow(Op, DAG))
2409 return ConvertLow;
2410
2411 SDLoc DL(Op);
2412 const EVT VecT = Op.getValueType();
2413 const EVT LaneT = Op.getOperand(0).getValueType();
2414 const size_t Lanes = Op.getNumOperands();
2415 bool CanSwizzle = VecT == MVT::v16i8;
2416
2417 // BUILD_VECTORs are lowered to the instruction that initializes the highest
2418 // possible number of lanes at once followed by a sequence of replace_lane
2419 // instructions to individually initialize any remaining lanes.
2420
2421 // TODO: Tune this. For example, lanewise swizzling is very expensive, so
2422 // swizzled lanes should be given greater weight.
2423
2424 // TODO: Investigate looping rather than always extracting/replacing specific
2425 // lanes to fill gaps.
2426
2427 auto IsConstant = [](const SDValue &V) {
2428 return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
2429 };
2430
2431 // Returns the source vector and index vector pair if they exist. Checks for:
2432 // (extract_vector_elt
2433 // $src,
2434 // (sign_extend_inreg (extract_vector_elt $indices, $i))
2435 // )
2436 auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
2437 auto Bail = std::make_pair(SDValue(), SDValue());
2438 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2439 return Bail;
2440 const SDValue &SwizzleSrc = Lane->getOperand(0);
2441 const SDValue &IndexExt = Lane->getOperand(1);
2442 if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
2443 return Bail;
2444 const SDValue &Index = IndexExt->getOperand(0);
2445 if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2446 return Bail;
2447 const SDValue &SwizzleIndices = Index->getOperand(0);
2448 if (SwizzleSrc.getValueType() != MVT::v16i8 ||
2449 SwizzleIndices.getValueType() != MVT::v16i8 ||
2450 Index->getOperand(1)->getOpcode() != ISD::Constant ||
2451 Index->getConstantOperandVal(1) != I)
2452 return Bail;
2453 return std::make_pair(SwizzleSrc, SwizzleIndices);
2454 };
2455
2456 // If the lane is extracted from another vector at a constant index, return
2457 // that vector. The source vector must not have more lanes than the dest
2458 // because the shufflevector indices are in terms of the destination lanes and
2459 // would not be able to address the smaller individual source lanes.
2460 auto GetShuffleSrc = [&](const SDValue &Lane) {
2461 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2462 return SDValue();
2463 if (!isa<ConstantSDNode>(Lane->getOperand(1).getNode()))
2464 return SDValue();
2465 if (Lane->getOperand(0).getValueType().getVectorNumElements() >
2466 VecT.getVectorNumElements())
2467 return SDValue();
2468 return Lane->getOperand(0);
2469 };
2470
2471 using ValueEntry = std::pair<SDValue, size_t>;
2472 SmallVector<ValueEntry, 16> SplatValueCounts;
2473
2474 using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
2475 SmallVector<SwizzleEntry, 16> SwizzleCounts;
2476
2477 using ShuffleEntry = std::pair<SDValue, size_t>;
2478 SmallVector<ShuffleEntry, 16> ShuffleCounts;
2479
2480 auto AddCount = [](auto &Counts, const auto &Val) {
2481 auto CountIt =
2482 llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; });
2483 if (CountIt == Counts.end()) {
2484 Counts.emplace_back(Val, 1);
2485 } else {
2486 CountIt->second++;
2487 }
2488 };
2489
2490 auto GetMostCommon = [](auto &Counts) {
2491 auto CommonIt = llvm::max_element(Counts, llvm::less_second());
2492 assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
2493 return *CommonIt;
2494 };
2495
2496 size_t NumConstantLanes = 0;
2497
2498 // Count eligible lanes for each type of vector creation op
2499 for (size_t I = 0; I < Lanes; ++I) {
2500 const SDValue &Lane = Op->getOperand(I);
2501 if (Lane.isUndef())
2502 continue;
2503
2504 AddCount(SplatValueCounts, Lane);
2505
2506 if (IsConstant(Lane))
2507 NumConstantLanes++;
2508 if (auto ShuffleSrc = GetShuffleSrc(Lane))
2509 AddCount(ShuffleCounts, ShuffleSrc);
2510 if (CanSwizzle) {
2511 auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
2512 if (SwizzleSrcs.first)
2513 AddCount(SwizzleCounts, SwizzleSrcs);
2514 }
2515 }
2516
2517 SDValue SplatValue;
2518 size_t NumSplatLanes;
2519 std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
2520
2521 SDValue SwizzleSrc;
2522 SDValue SwizzleIndices;
2523 size_t NumSwizzleLanes = 0;
2524 if (SwizzleCounts.size())
2525 std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
2526 NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
2527
2528 // Shuffles can draw from up to two vectors, so find the two most common
2529 // sources.
2530 SDValue ShuffleSrc1, ShuffleSrc2;
2531 size_t NumShuffleLanes = 0;
2532 if (ShuffleCounts.size()) {
2533 std::tie(ShuffleSrc1, NumShuffleLanes) = GetMostCommon(ShuffleCounts);
2534 llvm::erase_if(ShuffleCounts,
2535 [&](const auto &Pair) { return Pair.first == ShuffleSrc1; });
2536 }
2537 if (ShuffleCounts.size()) {
2538 size_t AdditionalShuffleLanes;
2539 std::tie(ShuffleSrc2, AdditionalShuffleLanes) =
2540 GetMostCommon(ShuffleCounts);
2541 NumShuffleLanes += AdditionalShuffleLanes;
2542 }
2543
2544 // Predicate returning true if the lane is properly initialized by the
2545 // original instruction
2546 std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
2548 // Prefer swizzles over shuffles over vector consts over splats
2549 if (NumSwizzleLanes >= NumShuffleLanes &&
2550 NumSwizzleLanes >= NumConstantLanes && NumSwizzleLanes >= NumSplatLanes) {
2551 Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
2552 SwizzleIndices);
2553 auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
2554 IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
2555 return Swizzled == GetSwizzleSrcs(I, Lane);
2556 };
2557 } else if (NumShuffleLanes >= NumConstantLanes &&
2558 NumShuffleLanes >= NumSplatLanes) {
2559 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits() / 8;
2560 size_t DestLaneCount = VecT.getVectorNumElements();
2561 size_t Scale1 = 1;
2562 size_t Scale2 = 1;
2563 SDValue Src1 = ShuffleSrc1;
2564 SDValue Src2 = ShuffleSrc2 ? ShuffleSrc2 : DAG.getUNDEF(VecT);
2565 if (Src1.getValueType() != VecT) {
2566 size_t LaneSize =
2568 assert(LaneSize > DestLaneSize);
2569 Scale1 = LaneSize / DestLaneSize;
2570 Src1 = DAG.getBitcast(VecT, Src1);
2571 }
2572 if (Src2.getValueType() != VecT) {
2573 size_t LaneSize =
2575 assert(LaneSize > DestLaneSize);
2576 Scale2 = LaneSize / DestLaneSize;
2577 Src2 = DAG.getBitcast(VecT, Src2);
2578 }
2579
2580 int Mask[16];
2581 assert(DestLaneCount <= 16);
2582 for (size_t I = 0; I < DestLaneCount; ++I) {
2583 const SDValue &Lane = Op->getOperand(I);
2584 SDValue Src = GetShuffleSrc(Lane);
2585 if (Src == ShuffleSrc1) {
2586 Mask[I] = Lane->getConstantOperandVal(1) * Scale1;
2587 } else if (Src && Src == ShuffleSrc2) {
2588 Mask[I] = DestLaneCount + Lane->getConstantOperandVal(1) * Scale2;
2589 } else {
2590 Mask[I] = -1;
2591 }
2592 }
2593 ArrayRef<int> MaskRef(Mask, DestLaneCount);
2594 Result = DAG.getVectorShuffle(VecT, DL, Src1, Src2, MaskRef);
2595 IsLaneConstructed = [&](size_t, const SDValue &Lane) {
2596 auto Src = GetShuffleSrc(Lane);
2597 return Src == ShuffleSrc1 || (Src && Src == ShuffleSrc2);
2598 };
2599 } else if (NumConstantLanes >= NumSplatLanes) {
2600 SmallVector<SDValue, 16> ConstLanes;
2601 for (const SDValue &Lane : Op->op_values()) {
2602 if (IsConstant(Lane)) {
2603 // Values may need to be fixed so that they will sign extend to be
2604 // within the expected range during ISel. Check whether the value is in
2605 // bounds based on the lane bit width and if it is out of bounds, lop
2606 // off the extra bits and subtract 2^n to reflect giving the high bit
2607 // value -2^(n-1) rather than +2^(n-1). Skip the i64 case because it
2608 // cannot possibly be out of range.
2609 auto *Const = dyn_cast<ConstantSDNode>(Lane.getNode());
2610 int64_t Val = Const ? Const->getSExtValue() : 0;
2611 uint64_t LaneBits = 128 / Lanes;
2612 assert((LaneBits == 64 || Val >= -(1ll << (LaneBits - 1))) &&
2613 "Unexpected out of bounds negative value");
2614 if (Const && LaneBits != 64 && Val > (1ll << (LaneBits - 1)) - 1) {
2615 uint64_t Mask = (1ll << LaneBits) - 1;
2616 auto NewVal = (((uint64_t)Val & Mask) - (1ll << LaneBits)) & Mask;
2617 ConstLanes.push_back(DAG.getConstant(NewVal, SDLoc(Lane), LaneT));
2618 } else {
2619 ConstLanes.push_back(Lane);
2620 }
2621 } else if (LaneT.isFloatingPoint()) {
2622 ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
2623 } else {
2624 ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
2625 }
2626 }
2627 Result = DAG.getBuildVector(VecT, DL, ConstLanes);
2628 IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
2629 return IsConstant(Lane);
2630 };
2631 } else {
2632 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits();
2633 if (NumSplatLanes == 1 && Op->getOperand(0) == SplatValue &&
2634 (DestLaneSize == 32 || DestLaneSize == 64)) {
2635 // Could be selected to load_zero.
2636 Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecT, SplatValue);
2637 } else {
2638 // Use a splat (which might be selected as a load splat)
2639 Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
2640 }
2641 IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
2642 return Lane == SplatValue;
2643 };
2644 }
2645
2646 assert(Result);
2647 assert(IsLaneConstructed);
2648
2649 // Add replace_lane instructions for any unhandled values
2650 for (size_t I = 0; I < Lanes; ++I) {
2651 const SDValue &Lane = Op->getOperand(I);
2652 if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
2653 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
2654 DAG.getConstant(I, DL, MVT::i32));
2655 }
2656
2657 return Result;
2658}
2659
2660SDValue
2661WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
2662 SelectionDAG &DAG) const {
2663 SDLoc DL(Op);
2664 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
2665 MVT VecType = Op.getOperand(0).getSimpleValueType();
2666 assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
2667 size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
2668
2669 // Space for two vector args and sixteen mask indices
2670 SDValue Ops[18];
2671 size_t OpIdx = 0;
2672 Ops[OpIdx++] = Op.getOperand(0);
2673 Ops[OpIdx++] = Op.getOperand(1);
2674
2675 // Expand mask indices to byte indices and materialize them as operands
2676 for (int M : Mask) {
2677 for (size_t J = 0; J < LaneBytes; ++J) {
2678 // Lower undefs (represented by -1 in mask) to {0..J}, which use a
2679 // whole lane of vector input, to allow further reduction at VM. E.g.
2680 // match an 8x16 byte shuffle to an equivalent cheaper 32x4 shuffle.
2681 uint64_t ByteIndex = M == -1 ? J : (uint64_t)M * LaneBytes + J;
2682 Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
2683 }
2684 }
2685
2686 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2687}
2688
2689SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
2690 SelectionDAG &DAG) const {
2691 SDLoc DL(Op);
2692 // The legalizer does not know how to expand the unsupported comparison modes
2693 // of i64x2 vectors, so we manually unroll them here.
2694 assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
2696 DAG.ExtractVectorElements(Op->getOperand(0), LHS);
2697 DAG.ExtractVectorElements(Op->getOperand(1), RHS);
2698 const SDValue &CC = Op->getOperand(2);
2699 auto MakeLane = [&](unsigned I) {
2700 return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
2701 DAG.getConstant(uint64_t(-1), DL, MVT::i64),
2702 DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);
2703 };
2704 return DAG.getBuildVector(Op->getValueType(0), DL,
2705 {MakeLane(0), MakeLane(1)});
2706}
2707
2708SDValue
2709WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
2710 SelectionDAG &DAG) const {
2711 // Allow constant lane indices, expand variable lane indices
2712 SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
2713 if (isa<ConstantSDNode>(IdxNode)) {
2714 // Ensure the index type is i32 to match the tablegen patterns
2715 uint64_t Idx = IdxNode->getAsZExtVal();
2716 SmallVector<SDValue, 3> Ops(Op.getNode()->ops());
2717 Ops[Op.getNumOperands() - 1] =
2718 DAG.getConstant(Idx, SDLoc(IdxNode), MVT::i32);
2719 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Ops);
2720 }
2721 // Perform default expansion
2722 return SDValue();
2723}
2724
2726 EVT LaneT = Op.getSimpleValueType().getVectorElementType();
2727 // 32-bit and 64-bit unrolled shifts will have proper semantics
2728 if (LaneT.bitsGE(MVT::i32))
2729 return DAG.UnrollVectorOp(Op.getNode());
2730 // Otherwise mask the shift value to get proper semantics from 32-bit shift
2731 SDLoc DL(Op);
2732 size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
2733 SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32);
2734 unsigned ShiftOpcode = Op.getOpcode();
2735 SmallVector<SDValue, 16> ShiftedElements;
2736 DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32);
2737 SmallVector<SDValue, 16> ShiftElements;
2738 DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32);
2739 SmallVector<SDValue, 16> UnrolledOps;
2740 for (size_t i = 0; i < NumLanes; ++i) {
2741 SDValue MaskedShiftValue =
2742 DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask);
2743 SDValue ShiftedValue = ShiftedElements[i];
2744 if (ShiftOpcode == ISD::SRA)
2745 ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32,
2746 ShiftedValue, DAG.getValueType(LaneT));
2747 UnrolledOps.push_back(
2748 DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue));
2749 }
2750 return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps);
2751}
2752
2753SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
2754 SelectionDAG &DAG) const {
2755 SDLoc DL(Op);
2756
2757 // Only manually lower vector shifts
2758 assert(Op.getSimpleValueType().isVector());
2759
2760 uint64_t LaneBits = Op.getValueType().getScalarSizeInBits();
2761 auto ShiftVal = Op.getOperand(1);
2762
2763 // Try to skip bitmask operation since it is implied inside shift instruction
2764 auto SkipImpliedMask = [](SDValue MaskOp, uint64_t MaskBits) {
2765 if (MaskOp.getOpcode() != ISD::AND)
2766 return MaskOp;
2767 SDValue LHS = MaskOp.getOperand(0);
2768 SDValue RHS = MaskOp.getOperand(1);
2769 if (MaskOp.getValueType().isVector()) {
2770 APInt MaskVal;
2771 if (!ISD::isConstantSplatVector(RHS.getNode(), MaskVal))
2772 std::swap(LHS, RHS);
2773
2774 if (ISD::isConstantSplatVector(RHS.getNode(), MaskVal) &&
2775 MaskVal == MaskBits)
2776 MaskOp = LHS;
2777 } else {
2778 if (!isa<ConstantSDNode>(RHS.getNode()))
2779 std::swap(LHS, RHS);
2780
2781 auto ConstantRHS = dyn_cast<ConstantSDNode>(RHS.getNode());
2782 if (ConstantRHS && ConstantRHS->getAPIntValue() == MaskBits)
2783 MaskOp = LHS;
2784 }
2785
2786 return MaskOp;
2787 };
2788
2789 // Skip vector and operation
2790 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2791 ShiftVal = DAG.getSplatValue(ShiftVal);
2792 if (!ShiftVal)
2793 return unrollVectorShift(Op, DAG);
2794
2795 // Skip scalar and operation
2796 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2797 // Use anyext because none of the high bits can affect the shift
2798 ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
2799
2800 unsigned Opcode;
2801 switch (Op.getOpcode()) {
2802 case ISD::SHL:
2803 Opcode = WebAssemblyISD::VEC_SHL;
2804 break;
2805 case ISD::SRA:
2806 Opcode = WebAssemblyISD::VEC_SHR_S;
2807 break;
2808 case ISD::SRL:
2809 Opcode = WebAssemblyISD::VEC_SHR_U;
2810 break;
2811 default:
2812 llvm_unreachable("unexpected opcode");
2813 }
2814
2815 return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);
2816}
2817
2818SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
2819 SelectionDAG &DAG) const {
2820 EVT ResT = Op.getValueType();
2821 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2822
2823 if ((ResT == MVT::i32 || ResT == MVT::i64) &&
2824 (SatVT == MVT::i32 || SatVT == MVT::i64))
2825 return Op;
2826
2827 if (ResT == MVT::v4i32 && SatVT == MVT::i32)
2828 return Op;
2829
2830 if (ResT == MVT::v8i16 && SatVT == MVT::i16)
2831 return Op;
2832
2833 return SDValue();
2834}
2835
2836//===----------------------------------------------------------------------===//
2837// Custom DAG combine hooks
2838//===----------------------------------------------------------------------===//
2839static SDValue
2841 auto &DAG = DCI.DAG;
2842 auto Shuffle = cast<ShuffleVectorSDNode>(N);
2843
2844 // Hoist vector bitcasts that don't change the number of lanes out of unary
2845 // shuffles, where they are less likely to get in the way of other combines.
2846 // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
2847 // (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
2848 SDValue Bitcast = N->getOperand(0);
2849 if (Bitcast.getOpcode() != ISD::BITCAST)
2850 return SDValue();
2851 if (!N->getOperand(1).isUndef())
2852 return SDValue();
2853 SDValue CastOp = Bitcast.getOperand(0);
2854 EVT SrcType = CastOp.getValueType();
2855 EVT DstType = Bitcast.getValueType();
2856 if (!SrcType.is128BitVector() ||
2857 SrcType.getVectorNumElements() != DstType.getVectorNumElements())
2858 return SDValue();
2859 SDValue NewShuffle = DAG.getVectorShuffle(
2860 SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask());
2861 return DAG.getBitcast(DstType, NewShuffle);
2862}
2863
2864/// Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get
2865/// split up into scalar instructions during legalization, and the vector
2866/// extending instructions are selected in performVectorExtendCombine below.
2867static SDValue
2870 auto &DAG = DCI.DAG;
2871 assert(N->getOpcode() == ISD::UINT_TO_FP ||
2872 N->getOpcode() == ISD::SINT_TO_FP);
2873
2874 EVT InVT = N->getOperand(0)->getValueType(0);
2875 EVT ResVT = N->getValueType(0);
2876 MVT ExtVT;
2877 if (ResVT == MVT::v4f32 && (InVT == MVT::v4i16 || InVT == MVT::v4i8))
2878 ExtVT = MVT::v4i32;
2879 else if (ResVT == MVT::v2f64 && (InVT == MVT::v2i16 || InVT == MVT::v2i8))
2880 ExtVT = MVT::v2i32;
2881 else
2882 return SDValue();
2883
2884 unsigned Op =
2886 SDValue Conv = DAG.getNode(Op, SDLoc(N), ExtVT, N->getOperand(0));
2887 return DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, Conv);
2888}
2889
2890static SDValue
2893 auto &DAG = DCI.DAG;
2894
2895 SDNodeFlags Flags = N->getFlags();
2896 SDValue Op0 = N->getOperand(0);
2897 EVT VT = N->getValueType(0);
2898
2899 // Optimize uitofp to sitofp when the sign bit is known to be zero.
2900 // Depending on the target (runtime) backend, this might be performance
2901 // neutral (e.g. AArch64) or a significant improvement (e.g. x86_64).
2902 if (VT.isVector() && (Flags.hasNonNeg() || DAG.SignBitIsZero(Op0))) {
2903 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0);
2904 }
2905
2906 return SDValue();
2907}
2908
2909static SDValue
2911 auto &DAG = DCI.DAG;
2912 assert(N->getOpcode() == ISD::SIGN_EXTEND ||
2913 N->getOpcode() == ISD::ZERO_EXTEND);
2914
2915 // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
2916 // possible before the extract_subvector can be expanded.
2917 auto Extract = N->getOperand(0);
2918 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2919 return SDValue();
2920 auto Source = Extract.getOperand(0);
2921 auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
2922 if (IndexNode == nullptr)
2923 return SDValue();
2924 auto Index = IndexNode->getZExtValue();
2925
2926 // Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the
2927 // extracted subvector is the low or high half of its source.
2928 EVT ResVT = N->getValueType(0);
2929 if (ResVT == MVT::v8i16) {
2930 if (Extract.getValueType() != MVT::v8i8 ||
2931 Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8))
2932 return SDValue();
2933 } else if (ResVT == MVT::v4i32) {
2934 if (Extract.getValueType() != MVT::v4i16 ||
2935 Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4))
2936 return SDValue();
2937 } else if (ResVT == MVT::v2i64) {
2938 if (Extract.getValueType() != MVT::v2i32 ||
2939 Source.getValueType() != MVT::v4i32 || (Index != 0 && Index != 2))
2940 return SDValue();
2941 } else {
2942 return SDValue();
2943 }
2944
2945 bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
2946 bool IsLow = Index == 0;
2947
2948 unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::EXTEND_LOW_S
2949 : WebAssemblyISD::EXTEND_HIGH_S)
2950 : (IsLow ? WebAssemblyISD::EXTEND_LOW_U
2951 : WebAssemblyISD::EXTEND_HIGH_U);
2952
2953 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
2954}
2955
2956static SDValue
2958 auto &DAG = DCI.DAG;
2959
2960 auto GetWasmConversionOp = [](unsigned Op) {
2961 switch (Op) {
2963 return WebAssemblyISD::TRUNC_SAT_ZERO_S;
2965 return WebAssemblyISD::TRUNC_SAT_ZERO_U;
2966 case ISD::FP_ROUND:
2967 return WebAssemblyISD::DEMOTE_ZERO;
2968 }
2969 llvm_unreachable("unexpected op");
2970 };
2971
2972 auto IsZeroSplat = [](SDValue SplatVal) {
2973 auto *Splat = dyn_cast<BuildVectorSDNode>(SplatVal.getNode());
2974 APInt SplatValue, SplatUndef;
2975 unsigned SplatBitSize;
2976 bool HasAnyUndefs;
2977 // Endianness doesn't matter in this context because we are looking for
2978 // an all-zero value.
2979 return Splat &&
2980 Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
2981 HasAnyUndefs) &&
2982 SplatValue == 0;
2983 };
2984
2985 if (N->getOpcode() == ISD::CONCAT_VECTORS) {
2986 // Combine this:
2987 //
2988 // (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))
2989 //
2990 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
2991 //
2992 // Or this:
2993 //
2994 // (concat_vectors (v2f32 (fp_round (v2f64 $x))), (v2f32 (splat 0)))
2995 //
2996 // into (f32x4.demote_zero_f64x2 $x).
2997 EVT ResVT;
2998 EVT ExpectedConversionType;
2999 auto Conversion = N->getOperand(0);
3000 auto ConversionOp = Conversion.getOpcode();
3001 switch (ConversionOp) {
3004 ResVT = MVT::v4i32;
3005 ExpectedConversionType = MVT::v2i32;
3006 break;
3007 case ISD::FP_ROUND:
3008 ResVT = MVT::v4f32;
3009 ExpectedConversionType = MVT::v2f32;
3010 break;
3011 default:
3012 return SDValue();
3013 }
3014
3015 if (N->getValueType(0) != ResVT)
3016 return SDValue();
3017
3018 if (Conversion.getValueType() != ExpectedConversionType)
3019 return SDValue();
3020
3021 auto Source = Conversion.getOperand(0);
3022 if (Source.getValueType() != MVT::v2f64)
3023 return SDValue();
3024
3025 if (!IsZeroSplat(N->getOperand(1)) ||
3026 N->getOperand(1).getValueType() != ExpectedConversionType)
3027 return SDValue();
3028
3029 unsigned Op = GetWasmConversionOp(ConversionOp);
3030 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3031 }
3032
3033 // Combine this:
3034 //
3035 // (fp_to_{s,u}int_sat (concat_vectors $x, (v2f64 (splat 0))), 32)
3036 //
3037 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3038 //
3039 // Or this:
3040 //
3041 // (v4f32 (fp_round (concat_vectors $x, (v2f64 (splat 0)))))
3042 //
3043 // into (f32x4.demote_zero_f64x2 $x).
3044 EVT ResVT;
3045 auto ConversionOp = N->getOpcode();
3046 switch (ConversionOp) {
3049 ResVT = MVT::v4i32;
3050 break;
3051 case ISD::FP_ROUND:
3052 ResVT = MVT::v4f32;
3053 break;
3054 default:
3055 llvm_unreachable("unexpected op");
3056 }
3057
3058 if (N->getValueType(0) != ResVT)
3059 return SDValue();
3060
3061 auto Concat = N->getOperand(0);
3062 if (Concat.getValueType() != MVT::v4f64)
3063 return SDValue();
3064
3065 auto Source = Concat.getOperand(0);
3066 if (Source.getValueType() != MVT::v2f64)
3067 return SDValue();
3068
3069 if (!IsZeroSplat(Concat.getOperand(1)) ||
3070 Concat.getOperand(1).getValueType() != MVT::v2f64)
3071 return SDValue();
3072
3073 unsigned Op = GetWasmConversionOp(ConversionOp);
3074 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3075}
3076
3077// Helper to extract VectorWidth bits from Vec, starting from IdxVal.
3078static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
3079 const SDLoc &DL, unsigned VectorWidth) {
3080 EVT VT = Vec.getValueType();
3081 EVT ElVT = VT.getVectorElementType();
3082 unsigned Factor = VT.getSizeInBits() / VectorWidth;
3083 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
3084 VT.getVectorNumElements() / Factor);
3085
3086 // Extract the relevant VectorWidth bits. Generate an EXTRACT_SUBVECTOR
3087 unsigned ElemsPerChunk = VectorWidth / ElVT.getSizeInBits();
3088 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
3089
3090 // This is the index of the first element of the VectorWidth-bit chunk
3091 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
3092 IdxVal &= ~(ElemsPerChunk - 1);
3093
3094 // If the input is a buildvector just emit a smaller one.
3095 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
3096 return DAG.getBuildVector(ResultVT, DL,
3097 Vec->ops().slice(IdxVal, ElemsPerChunk));
3098
3099 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, DL);
3100 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, VecIdx);
3101}
3102
3103// Helper to recursively truncate vector elements in half with NARROW_U. DstVT
3104// is the expected destination value type after recursion. In is the initial
3105// input. Note that the input should have enough leading zero bits to prevent
3106// NARROW_U from saturating results.
3108 SelectionDAG &DAG) {
3109 EVT SrcVT = In.getValueType();
3110
3111 // No truncation required, we might get here due to recursive calls.
3112 if (SrcVT == DstVT)
3113 return In;
3114
3115 unsigned SrcSizeInBits = SrcVT.getSizeInBits();
3116 unsigned NumElems = SrcVT.getVectorNumElements();
3117 if (!isPowerOf2_32(NumElems))
3118 return SDValue();
3119 assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation");
3120 assert(SrcSizeInBits > DstVT.getSizeInBits() && "Illegal truncation");
3121
3122 LLVMContext &Ctx = *DAG.getContext();
3123 EVT PackedSVT = EVT::getIntegerVT(Ctx, SrcVT.getScalarSizeInBits() / 2);
3124
3125 // Narrow to the largest type possible:
3126 // vXi64/vXi32 -> i16x8.narrow_i32x4_u and vXi16 -> i8x16.narrow_i16x8_u.
3127 EVT InVT = MVT::i16, OutVT = MVT::i8;
3128 if (SrcVT.getScalarSizeInBits() > 16) {
3129 InVT = MVT::i32;
3130 OutVT = MVT::i16;
3131 }
3132 unsigned SubSizeInBits = SrcSizeInBits / 2;
3133 InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits());
3134 OutVT = EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits());
3135
3136 // Split lower/upper subvectors.
3137 SDValue Lo = extractSubVector(In, 0, DAG, DL, SubSizeInBits);
3138 SDValue Hi = extractSubVector(In, NumElems / 2, DAG, DL, SubSizeInBits);
3139
3140 // 256bit -> 128bit truncate - Narrow lower/upper 128-bit subvectors.
3141 if (SrcVT.is256BitVector() && DstVT.is128BitVector()) {
3142 Lo = DAG.getBitcast(InVT, Lo);
3143 Hi = DAG.getBitcast(InVT, Hi);
3144 SDValue Res = DAG.getNode(WebAssemblyISD::NARROW_U, DL, OutVT, Lo, Hi);
3145 return DAG.getBitcast(DstVT, Res);
3146 }
3147
3148 // Recursively narrow lower/upper subvectors, concat result and narrow again.
3149 EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems / 2);
3150 Lo = truncateVectorWithNARROW(PackedVT, Lo, DL, DAG);
3151 Hi = truncateVectorWithNARROW(PackedVT, Hi, DL, DAG);
3152
3153 PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems);
3154 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi);
3155 return truncateVectorWithNARROW(DstVT, Res, DL, DAG);
3156}
3157
3160 auto &DAG = DCI.DAG;
3161
3162 SDValue In = N->getOperand(0);
3163 EVT InVT = In.getValueType();
3164 if (!InVT.isSimple())
3165 return SDValue();
3166
3167 EVT OutVT = N->getValueType(0);
3168 if (!OutVT.isVector())
3169 return SDValue();
3170
3171 EVT OutSVT = OutVT.getVectorElementType();
3172 EVT InSVT = InVT.getVectorElementType();
3173 // Currently only cover truncate to v16i8 or v8i16.
3174 if (!((InSVT == MVT::i16 || InSVT == MVT::i32 || InSVT == MVT::i64) &&
3175 (OutSVT == MVT::i8 || OutSVT == MVT::i16) && OutVT.is128BitVector()))
3176 return SDValue();
3177
3178 SDLoc DL(N);
3180 OutVT.getScalarSizeInBits());
3181 In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT));
3182 return truncateVectorWithNARROW(OutVT, In, DL, DAG);
3183}
3184
3187 using namespace llvm::SDPatternMatch;
3188 auto &DAG = DCI.DAG;
3189 SDLoc DL(N);
3190 SDValue Src = N->getOperand(0);
3191 EVT VT = N->getValueType(0);
3192 EVT SrcVT = Src.getValueType();
3193
3194 if (!(DCI.isBeforeLegalize() && VT.isScalarInteger() &&
3195 SrcVT.isFixedLengthVector() && SrcVT.getScalarType() == MVT::i1))
3196 return SDValue();
3197
3198 unsigned NumElts = SrcVT.getVectorNumElements();
3199 EVT Width = MVT::getIntegerVT(128 / NumElts);
3200
3201 // bitcast <N x i1> to iN, where N = 2, 4, 8, 16 (legal)
3202 // ==> bitmask
3203 if (NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) {
3204 return DAG.getZExtOrTrunc(
3205 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3206 {DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32),
3207 DAG.getSExtOrTrunc(N->getOperand(0), DL,
3208 SrcVT.changeVectorElementType(Width))}),
3209 DL, VT);
3210 }
3211
3212 // bitcast <N x i1>(setcc ...) to concat iN, where N = 32 and 64 (illegal)
3213 if (NumElts == 32 || NumElts == 64) {
3214 // Strategy: We will setcc them seperately in v16i8 -> v16i1
3215 // Bitcast them to i16, extend them to either i32 or i64.
3216 // Add them together, shifting left 1 by 1.
3217 SDValue Concat, SetCCVector;
3218 ISD::CondCode SetCond;
3219
3220 if (!sd_match(N, m_BitCast(m_c_SetCC(m_Value(Concat), m_Value(SetCCVector),
3221 m_CondCode(SetCond)))))
3222 return SDValue();
3223 if (Concat.getOpcode() != ISD::CONCAT_VECTORS)
3224 return SDValue();
3225
3226 uint64_t ElementWidth =
3228
3229 SmallVector<SDValue> VectorsToShuffle;
3230 for (size_t I = 0; I < Concat->ops().size(); I++) {
3231 VectorsToShuffle.push_back(DAG.getBitcast(
3232 MVT::i16,
3233 DAG.getSetCC(DL, MVT::v16i1, Concat->ops()[I],
3234 extractSubVector(SetCCVector, I * (128 / ElementWidth),
3235 DAG, DL, 128),
3236 SetCond)));
3237 }
3238
3239 MVT ReturnType = VectorsToShuffle.size() == 2 ? MVT::i32 : MVT::i64;
3240 SDValue ReturningInteger = DAG.getConstant(0, DL, ReturnType);
3241
3242 for (SDValue V : VectorsToShuffle) {
3243 ReturningInteger = DAG.getNode(
3244 ISD::SHL, DL, ReturnType,
3245 {DAG.getShiftAmountConstant(16, ReturnType, DL), ReturningInteger});
3246
3247 SDValue ExtendedV = DAG.getZExtOrTrunc(V, DL, ReturnType);
3248 ReturningInteger =
3249 DAG.getNode(ISD::ADD, DL, ReturnType, {ReturningInteger, ExtendedV});
3250 }
3251
3252 return ReturningInteger;
3253 }
3254
3255 return SDValue();
3256}
3257
3259 // any_true (setcc <X>, 0, eq) => (not (all_true X))
3260 // all_true (setcc <X>, 0, eq) => (not (any_true X))
3261 // any_true (setcc <X>, 0, ne) => (any_true X)
3262 // all_true (setcc <X>, 0, ne) => (all_true X)
3263 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3264 using namespace llvm::SDPatternMatch;
3265
3266 SDValue LHS;
3267 if (N->getNumOperands() < 2 ||
3268 !sd_match(N->getOperand(1),
3270 return SDValue();
3271 EVT LT = LHS.getValueType();
3272 if (LT.getScalarSizeInBits() > 128 / LT.getVectorNumElements())
3273 return SDValue();
3274
3275 auto CombineSetCC = [&N, &DAG](Intrinsic::WASMIntrinsics InPre,
3276 ISD::CondCode SetType,
3277 Intrinsic::WASMIntrinsics InPost) {
3278 if (N->getConstantOperandVal(0) != InPre)
3279 return SDValue();
3280
3281 SDValue LHS;
3282 if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(),
3283 m_SpecificCondCode(SetType))))
3284 return SDValue();
3285
3286 SDLoc DL(N);
3287 SDValue Ret = DAG.getZExtOrTrunc(
3288 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3289 {DAG.getConstant(InPost, DL, MVT::i32), LHS}),
3290 DL, MVT::i1);
3291 if (SetType == ISD::SETEQ)
3292 Ret = DAG.getNOT(DL, Ret, MVT::i1);
3293 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3294 };
3295
3296 if (SDValue AnyTrueEQ = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETEQ,
3297 Intrinsic::wasm_alltrue))
3298 return AnyTrueEQ;
3299 if (SDValue AllTrueEQ = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETEQ,
3300 Intrinsic::wasm_anytrue))
3301 return AllTrueEQ;
3302 if (SDValue AnyTrueNE = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETNE,
3303 Intrinsic::wasm_anytrue))
3304 return AnyTrueNE;
3305 if (SDValue AllTrueNE = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETNE,
3306 Intrinsic::wasm_alltrue))
3307 return AllTrueNE;
3308
3309 return SDValue();
3310}
3311
3312template <int MatchRHS, ISD::CondCode MatchCond, bool RequiresNegate,
3313 Intrinsic::ID Intrin>
3315 SDValue LHS = N->getOperand(0);
3316 SDValue RHS = N->getOperand(1);
3317 SDValue Cond = N->getOperand(2);
3318 if (MatchCond != cast<CondCodeSDNode>(Cond)->get())
3319 return SDValue();
3320
3321 if (MatchRHS != cast<ConstantSDNode>(RHS)->getSExtValue())
3322 return SDValue();
3323
3324 SDLoc DL(N);
3325 SDValue Ret = DAG.getZExtOrTrunc(
3326 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3327 {DAG.getConstant(Intrin, DL, MVT::i32),
3328 DAG.getSExtOrTrunc(LHS->getOperand(0), DL, VecVT)}),
3329 DL, MVT::i1);
3330 if (RequiresNegate)
3331 Ret = DAG.getNOT(DL, Ret, MVT::i1);
3332 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3333}
3334
3335/// Try to convert a i128 comparison to a v16i8 comparison before type
3336/// legalization splits it up into chunks
3337static SDValue
3339 const WebAssemblySubtarget *Subtarget) {
3340
3341 SDLoc DL(N);
3342 SDValue X = N->getOperand(0);
3343 SDValue Y = N->getOperand(1);
3344 EVT VT = N->getValueType(0);
3345 EVT OpVT = X.getValueType();
3346
3347 SelectionDAG &DAG = DCI.DAG;
3349 Attribute::NoImplicitFloat))
3350 return SDValue();
3351
3352 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3353 // We're looking for an oversized integer equality comparison with SIMD
3354 if (!OpVT.isScalarInteger() || !OpVT.isByteSized() || OpVT != MVT::i128 ||
3355 !Subtarget->hasSIMD128() || !isIntEqualitySetCC(CC))
3356 return SDValue();
3357
3358 // Don't perform this combine if constructing the vector will be expensive.
3359 auto IsVectorBitCastCheap = [](SDValue X) {
3361 return isa<ConstantSDNode>(X) || X.getOpcode() == ISD::LOAD;
3362 };
3363
3364 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
3365 return SDValue();
3366
3367 SDValue VecX = DAG.getBitcast(MVT::v16i8, X);
3368 SDValue VecY = DAG.getBitcast(MVT::v16i8, Y);
3369 SDValue Cmp = DAG.getSetCC(DL, MVT::v16i8, VecX, VecY, CC);
3370
3371 SDValue Intr =
3372 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3373 {DAG.getConstant(CC == ISD::SETEQ ? Intrinsic::wasm_alltrue
3374 : Intrinsic::wasm_anytrue,
3375 DL, MVT::i32),
3376 Cmp});
3377
3378 return DAG.getSetCC(DL, VT, Intr, DAG.getConstant(0, DL, MVT::i32),
3379 ISD::SETNE);
3380}
3381
3384 const WebAssemblySubtarget *Subtarget) {
3385 if (!DCI.isBeforeLegalize())
3386 return SDValue();
3387
3388 EVT VT = N->getValueType(0);
3389 if (!VT.isScalarInteger())
3390 return SDValue();
3391
3392 if (SDValue V = combineVectorSizedSetCCEquality(N, DCI, Subtarget))
3393 return V;
3394
3395 SDValue LHS = N->getOperand(0);
3396 if (LHS->getOpcode() != ISD::BITCAST)
3397 return SDValue();
3398
3399 EVT FromVT = LHS->getOperand(0).getValueType();
3400 if (!FromVT.isFixedLengthVector() || FromVT.getVectorElementType() != MVT::i1)
3401 return SDValue();
3402
3403 unsigned NumElts = FromVT.getVectorNumElements();
3404 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
3405 return SDValue();
3406
3407 if (!cast<ConstantSDNode>(N->getOperand(1)))
3408 return SDValue();
3409
3410 EVT VecVT = FromVT.changeVectorElementType(MVT::getIntegerVT(128 / NumElts));
3411 auto &DAG = DCI.DAG;
3412 // setcc (iN (bitcast (vNi1 X))), 0, ne
3413 // ==> any_true (vNi1 X)
3415 N, VecVT, DAG)) {
3416 return Match;
3417 }
3418 // setcc (iN (bitcast (vNi1 X))), 0, eq
3419 // ==> xor (any_true (vNi1 X)), -1
3421 N, VecVT, DAG)) {
3422 return Match;
3423 }
3424 // setcc (iN (bitcast (vNi1 X))), -1, eq
3425 // ==> all_true (vNi1 X)
3427 N, VecVT, DAG)) {
3428 return Match;
3429 }
3430 // setcc (iN (bitcast (vNi1 X))), -1, ne
3431 // ==> xor (all_true (vNi1 X)), -1
3433 N, VecVT, DAG)) {
3434 return Match;
3435 }
3436 return SDValue();
3437}
3438
3440 EVT VT = N->getValueType(0);
3441 if (VT != MVT::v8i32 && VT != MVT::v16i32)
3442 return SDValue();
3443
3444 // Mul with extending inputs.
3445 SDValue LHS = N->getOperand(0);
3446 SDValue RHS = N->getOperand(1);
3447 if (LHS.getOpcode() != RHS.getOpcode())
3448 return SDValue();
3449
3450 if (LHS.getOpcode() != ISD::SIGN_EXTEND &&
3451 LHS.getOpcode() != ISD::ZERO_EXTEND)
3452 return SDValue();
3453
3454 if (LHS->getOperand(0).getValueType() != RHS->getOperand(0).getValueType())
3455 return SDValue();
3456
3457 EVT FromVT = LHS->getOperand(0).getValueType();
3458 EVT EltTy = FromVT.getVectorElementType();
3459 if (EltTy != MVT::i8)
3460 return SDValue();
3461
3462 // For an input DAG that looks like this
3463 // %a = input_type
3464 // %b = input_type
3465 // %lhs = extend %a to output_type
3466 // %rhs = extend %b to output_type
3467 // %mul = mul %lhs, %rhs
3468
3469 // input_type | output_type | instructions
3470 // v16i8 | v16i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3471 // | | %high = i16x8.extmul_high_i8x16_, %a, %b
3472 // | | %low_low = i32x4.ext_low_i16x8_ %low
3473 // | | %low_high = i32x4.ext_high_i16x8_ %low
3474 // | | %high_low = i32x4.ext_low_i16x8_ %high
3475 // | | %high_high = i32x4.ext_high_i16x8_ %high
3476 // | | %res = concat_vector(...)
3477 // v8i8 | v8i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3478 // | | %low_low = i32x4.ext_low_i16x8_ %low
3479 // | | %low_high = i32x4.ext_high_i16x8_ %low
3480 // | | %res = concat_vector(%low_low, %low_high)
3481
3482 SDLoc DL(N);
3483 unsigned NumElts = VT.getVectorNumElements();
3484 SDValue ExtendInLHS = LHS->getOperand(0);
3485 SDValue ExtendInRHS = RHS->getOperand(0);
3486 bool IsSigned = LHS->getOpcode() == ISD::SIGN_EXTEND;
3487 unsigned ExtendLowOpc =
3488 IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3489 unsigned ExtendHighOpc =
3490 IsSigned ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3491
3492 auto GetExtendLow = [&DAG, &DL, &ExtendLowOpc](EVT VT, SDValue Op) {
3493 return DAG.getNode(ExtendLowOpc, DL, VT, Op);
3494 };
3495 auto GetExtendHigh = [&DAG, &DL, &ExtendHighOpc](EVT VT, SDValue Op) {
3496 return DAG.getNode(ExtendHighOpc, DL, VT, Op);
3497 };
3498
3499 if (NumElts == 16) {
3500 SDValue LowLHS = GetExtendLow(MVT::v8i16, ExtendInLHS);
3501 SDValue LowRHS = GetExtendLow(MVT::v8i16, ExtendInRHS);
3502 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3503 SDValue HighLHS = GetExtendHigh(MVT::v8i16, ExtendInLHS);
3504 SDValue HighRHS = GetExtendHigh(MVT::v8i16, ExtendInRHS);
3505 SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v8i16, HighLHS, HighRHS);
3506 SDValue SubVectors[] = {
3507 GetExtendLow(MVT::v4i32, MulLow),
3508 GetExtendHigh(MVT::v4i32, MulLow),
3509 GetExtendLow(MVT::v4i32, MulHigh),
3510 GetExtendHigh(MVT::v4i32, MulHigh),
3511 };
3512 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubVectors);
3513 } else {
3514 assert(NumElts == 8);
3515 SDValue LowLHS = DAG.getNode(LHS->getOpcode(), DL, MVT::v8i16, ExtendInLHS);
3516 SDValue LowRHS = DAG.getNode(RHS->getOpcode(), DL, MVT::v8i16, ExtendInRHS);
3517 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3518 SDValue Lo = GetExtendLow(MVT::v4i32, MulLow);
3519 SDValue Hi = GetExtendHigh(MVT::v4i32, MulLow);
3520 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
3521 }
3522 return SDValue();
3523}
3524
3527 assert(N->getOpcode() == ISD::MUL);
3528 EVT VT = N->getValueType(0);
3529 if (!VT.isVector())
3530 return SDValue();
3531
3532 if (auto Res = TryWideExtMulCombine(N, DCI.DAG))
3533 return Res;
3534
3535 // We don't natively support v16i8 or v8i8 mul, but we do support v8i16. So,
3536 // extend them to v8i16. Only do this before legalization in case a narrow
3537 // vector is widened and may be simplified later.
3538 if (!DCI.isBeforeLegalize() || (VT != MVT::v8i8 && VT != MVT::v16i8))
3539 return SDValue();
3540
3541 SDLoc DL(N);
3542 SelectionDAG &DAG = DCI.DAG;
3543 SDValue LHS = N->getOperand(0);
3544 SDValue RHS = N->getOperand(1);
3545 EVT MulVT = MVT::v8i16;
3546
3547 if (VT == MVT::v8i8) {
3548 SDValue PromotedLHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, LHS,
3549 DAG.getUNDEF(MVT::v8i8));
3550 SDValue PromotedRHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, RHS,
3551 DAG.getUNDEF(MVT::v8i8));
3552 SDValue LowLHS =
3553 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedLHS);
3554 SDValue LowRHS =
3555 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedRHS);
3556 SDValue MulLow = DAG.getBitcast(
3557 MVT::v16i8, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3558 // Take the low byte of each lane.
3559 SDValue Shuffle = DAG.getVectorShuffle(
3560 MVT::v16i8, DL, MulLow, DAG.getUNDEF(MVT::v16i8),
3561 {0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1});
3562 return extractSubVector(Shuffle, 0, DAG, DL, 64);
3563 } else {
3564 assert(VT == MVT::v16i8 && "Expected v16i8");
3565 SDValue LowLHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, LHS);
3566 SDValue LowRHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, RHS);
3567 SDValue HighLHS =
3568 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, LHS);
3569 SDValue HighRHS =
3570 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, RHS);
3571
3572 SDValue MulLow =
3573 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3574 SDValue MulHigh =
3575 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, HighLHS, HighRHS));
3576
3577 // Take the low byte of each lane.
3578 return DAG.getVectorShuffle(
3579 VT, DL, MulLow, MulHigh,
3580 {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
3581 }
3582}
3583
3584SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems,
3585 SelectionDAG &DAG) {
3586 SDLoc DL(In);
3587 LLVMContext &Ctx = *DAG.getContext();
3588 EVT InVT = In.getValueType();
3589 unsigned NumElems = InVT.getVectorNumElements() * 2;
3590 EVT OutVT = EVT::getVectorVT(Ctx, InVT.getVectorElementType(), NumElems);
3591 SDValue Concat =
3592 DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, In, DAG.getPOISON(InVT));
3593 if (NumElems < RequiredNumElems) {
3594 return DoubleVectorWidth(Concat, RequiredNumElems, DAG);
3595 }
3596 return Concat;
3597}
3598
3600 EVT OutVT = N->getValueType(0);
3601 if (!OutVT.isVector())
3602 return SDValue();
3603
3604 EVT OutElTy = OutVT.getVectorElementType();
3605 if (OutElTy != MVT::i8 && OutElTy != MVT::i16)
3606 return SDValue();
3607
3608 unsigned NumElems = OutVT.getVectorNumElements();
3609 if (!isPowerOf2_32(NumElems))
3610 return SDValue();
3611
3612 EVT FPVT = N->getOperand(0)->getValueType(0);
3613 if (FPVT.getVectorElementType() != MVT::f32)
3614 return SDValue();
3615
3616 SDLoc DL(N);
3617
3618 // First, convert to i32.
3619 LLVMContext &Ctx = *DAG.getContext();
3620 EVT IntVT = EVT::getVectorVT(Ctx, MVT::i32, NumElems);
3621 SDValue ToInt = DAG.getNode(N->getOpcode(), DL, IntVT, N->getOperand(0));
3623 OutVT.getScalarSizeInBits());
3624 // Mask out the top MSBs.
3625 SDValue Masked =
3626 DAG.getNode(ISD::AND, DL, IntVT, ToInt, DAG.getConstant(Mask, DL, IntVT));
3627
3628 if (OutVT.getSizeInBits() < 128) {
3629 // Create a wide enough vector that we can use narrow.
3630 EVT NarrowedVT = OutElTy == MVT::i8 ? MVT::v16i8 : MVT::v8i16;
3631 unsigned NumRequiredElems = NarrowedVT.getVectorNumElements();
3632 SDValue WideVector = DoubleVectorWidth(Masked, NumRequiredElems, DAG);
3633 SDValue Trunc = truncateVectorWithNARROW(NarrowedVT, WideVector, DL, DAG);
3634 return DAG.getBitcast(
3635 OutVT, extractSubVector(Trunc, 0, DAG, DL, OutVT.getSizeInBits()));
3636 } else {
3637 return truncateVectorWithNARROW(OutVT, Masked, DL, DAG);
3638 }
3639 return SDValue();
3640}
3641
3642SDValue
3643WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
3644 DAGCombinerInfo &DCI) const {
3645 switch (N->getOpcode()) {
3646 default:
3647 return SDValue();
3648 case ISD::BITCAST:
3649 return performBitcastCombine(N, DCI);
3650 case ISD::SETCC:
3651 return performSETCCCombine(N, DCI, Subtarget);
3653 return performVECTOR_SHUFFLECombine(N, DCI);
3654 case ISD::SIGN_EXTEND:
3655 case ISD::ZERO_EXTEND:
3656 return performVectorExtendCombine(N, DCI);
3657 case ISD::UINT_TO_FP:
3658 if (auto ExtCombine = performVectorExtendToFPCombine(N, DCI))
3659 return ExtCombine;
3660 return performVectorNonNegToFPCombine(N, DCI);
3661 case ISD::SINT_TO_FP:
3662 return performVectorExtendToFPCombine(N, DCI);
3665 case ISD::FP_ROUND:
3667 return performVectorTruncZeroCombine(N, DCI);
3668 case ISD::FP_TO_SINT:
3669 case ISD::FP_TO_UINT:
3670 return performConvertFPCombine(N, DCI.DAG);
3671 case ISD::TRUNCATE:
3672 return performTruncateCombine(N, DCI);
3674 return performAnyAllCombine(N, DCI.DAG);
3675 case ISD::MUL:
3676 return performMulCombine(N, DCI);
3677 }
3678}
unsigned const MachineRegisterInfo * MRI
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
return SDValue()
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
Function Alias Analysis Results
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, SDValue Val={})
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
Hexagon Common GEP
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
MachineInstr unsigned OpIdx
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try to map an integer comparison with size > XLEN to vector instructions before type legalization spl...
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static MachineBasicBlock * LowerFPToInt(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool IsUnsigned, bool Int64, bool Float64, unsigned LoweredOpcode)
static bool callingConvSupported(CallingConv::ID CallConv)
static SDValue TryWideExtMulCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerMemcpy(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
static std::optional< unsigned > IsWebAssemblyLocal(SDValue Op, SelectionDAG &DAG)
static SDValue performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performVectorNonNegToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG)
static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB, const WebAssemblySubtarget *Subtarget, const TargetInstrInfo &TII)
static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG)
static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT, SelectionDAG &DAG)
SDValue performConvertFPCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool IsWebAssemblyGlobal(SDValue Op)
static MachineBasicBlock * LowerMemset(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems, SelectionDAG &DAG)
static SDValue performVectorExtendToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get split up into scalar instr...
static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG)
static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &DL, unsigned VectorWidth)
static SDValue performBitcastCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue truncateVectorWithNARROW(EVT DstVT, SDValue In, const SDLoc &DL, SelectionDAG &DAG)
This file defines the interfaces that WebAssembly uses to lower LLVM code into a selection DAG.
This file provides WebAssembly-specific target descriptions.
This file declares WebAssembly-specific per-machine-function information.
This file declares the WebAssembly-specific subclass of TargetSubtarget.
This file declares the WebAssembly-specific subclass of TargetMachine.
This file contains the declaration of the WebAssembly-specific type parsing utility functions.
This file contains the declaration of the WebAssembly-specific utility functions.
X86 cmov Conversion
static constexpr int Concat[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1386
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
an instruction that atomically reads a memory location, combines it with another value,...
@ Add
*p = old + v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
BinOp getOperation() const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
This class represents a function call, abstracting a target machine's calling convention.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
A debug info location.
Definition DebugLoc.h:124
Diagnostic information for unsupported feature in backend.
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
LLVM_ABI unsigned getAddressSpace() const
const GlobalValue * getGlobal() const
ThreadLocalMode getThreadLocalMode() const
Type * getValueType() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
Describe properties that are true of each instruction in the target description file.
void setNoStrip() const
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
@ INVALID_SIMPLE_VALUE_TYPE
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool isFixedLengthVector() const
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addFPImm(const ConstantFP *Val) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
mop_range defs()
Returns all explicit operands that are register definitions.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
mop_range uses()
Returns all operands which may be register uses.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
const std::vector< MachineJumpTableEntry > & getJumpTables() const
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI SDValue getBasicBlock(MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
Primary interface to the complete machine description for the target machine.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:258
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:286
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:285
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:705
static std::optional< unsigned > getLocalForStackObject(MachineFunction &MF, int FrameIndex)
WebAssemblyTargetLowering(const TargetMachine &TM, const WebAssemblySubtarget &STI)
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const override
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const override
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
self_iterator getIterator()
Definition ilist_node.h:123
#define INT64_MIN
Definition DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Swift
Calling convention for Swift.
Definition CallingConv.h:69
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ CXX_FAST_TLS
Used for access functions.
Definition CallingConv.h:72
@ WASM_EmscriptenInvoke
For emscripten __invoke_* functions.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition CallingConv.h:66
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:289
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:577
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:898
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ FMULADD
FMULADD - Performs a * b + c, with, or without, intermediate rounding.
Definition ISDOpcodes.h:521
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:669
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:799
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:174
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:299
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ ExternalSymbol
Definition ISDOpcodes.h:93
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:909
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:933
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:821
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:527
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
OperandFlags
These are flags set on operands, but should be considered private, all access should go through the M...
Definition MCInstrDesc.h:51
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
CondCode_match m_SpecificCondCode(ISD::CondCode CC)
Match a conditional code SDNode with a specific ISD::CondCode.
CondCode_match m_CondCode()
Match any conditional code SDNode.
TernaryOpc_match< T0_P, T1_P, T2_P, true, false > m_c_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
MCSymbolWasm * getOrCreateFunctionTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __indirect_function_table, for use in call_indirect and in function bitcasts.
bool isWebAssemblyFuncrefType(const Type *Ty)
Return true if this is a WebAssembly Funcref Type.
bool isWebAssemblyTableType(const Type *Ty)
Return true if the table represents a WebAssembly table type.
MCSymbolWasm * getOrCreateFuncrefCallTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __funcref_call_table, for use in funcref calls when lowered to table.set + call_indirect.
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
bool isValidAddressSpace(unsigned AS)
bool canLowerReturn(size_t ResultSize, const WebAssemblySubtarget *Subtarget)
Returns true if the function's return value(s) can be lowered directly, i.e., not indirectly via a po...
bool isWasmVarAddressSpace(unsigned AS)
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void computeSignatureVTs(const FunctionType *Ty, const Function *TargetFunc, const Function &ContextFunc, const TargetMachine &TM, SmallVectorImpl< MVT > &Params, SmallVectorImpl< MVT > &Results)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1655
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Add
Sum of integers.
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2030
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2120
void computeLegalValueVTs(const WebAssemblyTargetLowering &TLI, LLVMContext &Ctx, const DataLayout &DL, Type *Ty, SmallVectorImpl< MVT > &ValueVTs)
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:243
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:292
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
Align getNonZeroOrigAlign() const
unsigned getByValSize() const
bool isInConsecutiveRegsLast() const
Align getNonZeroByValAlign() const
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
This structure is used to pass arguments to makeLibCall function.