LLVM 23.0.0git
WebAssemblyTargetTransformInfo.cpp
Go to the documentation of this file.
1//===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines the WebAssembly-specific TargetTransformInfo
11/// implementation.
12///
13//===----------------------------------------------------------------------===//
14
17#include "llvm/IR/IntrinsicsWebAssembly.h"
19
21using namespace llvm;
22
23#define DEBUG_TYPE "wasmtti"
24
26WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const {
27 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
29}
30
31unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
32 unsigned Result = BaseT::getNumberOfRegisters(ClassID);
33
34 // For SIMD, use at least 16 registers, as a rough guess.
35 bool Vector = (ClassID == 1);
36 if (Vector)
37 Result = std::max(Result, 16u);
38
39 return Result;
40}
41
44 switch (K) {
46 return TypeSize::getFixed(64);
48 return TypeSize::getFixed(getST()->hasSIMD128() ? 128 : 64);
50 return TypeSize::getScalable(0);
51 }
52
53 llvm_unreachable("Unsupported register kind");
54}
55
57 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
59 ArrayRef<const Value *> Args, const Instruction *CxtI) const {
60
61 if (ST->hasSIMD128()) {
62 static const CostTblEntry ArithCostTbl[]{
63 // extmul + (maybe awkward) shuffle
64 {ISD::MUL, MVT::v8i8, 4},
65 // 2x extmul + (okay) shuffle
66 {ISD::MUL, MVT::v16i8, 4},
67 // extmul
68 {ISD::MUL, MVT::v4i16, 1},
69 // extmul
70 {ISD::MUL, MVT::v2i32, 1},
71 };
72 EVT DstVT = TLI->getValueType(DL, Ty);
73 if (DstVT.isSimple()) {
74 int ISD = TLI->InstructionOpcodeToISD(Opcode);
75 if (const auto *Entry =
76 CostTableLookup(ArithCostTbl, ISD, DstVT.getSimpleVT()))
77 return Entry->Cost;
78 }
79 }
80
83 Opcode, Ty, CostKind, Op1Info, Op2Info);
84
85 if (auto *VTy = dyn_cast<VectorType>(Ty)) {
86 switch (Opcode) {
87 case Instruction::LShr:
88 case Instruction::AShr:
89 case Instruction::Shl:
90 // SIMD128's shifts currently only accept a scalar shift count. For each
91 // element, we'll need to extract, op, insert. The following is a rough
92 // approximation.
93 if (!Op2Info.isUniform())
94 Cost =
95 cast<FixedVectorType>(VTy)->getNumElements() *
97 getArithmeticInstrCost(Opcode, VTy->getElementType(), CostKind) +
99 break;
100 }
101 }
102 return Cost;
103}
104
106 unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH,
108 int ISD = TLI->InstructionOpcodeToISD(Opcode);
109 auto SrcTy = TLI->getValueType(DL, Src);
110 auto DstTy = TLI->getValueType(DL, Dst);
111
112 if (!SrcTy.isSimple() || !DstTy.isSimple()) {
113 return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
114 }
115
116 if (!ST->hasSIMD128()) {
117 return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
118 }
119
120 auto DstVT = DstTy.getSimpleVT();
121 auto SrcVT = SrcTy.getSimpleVT();
122
123 if (I && I->hasOneUser()) {
124 auto *SingleUser = cast<Instruction>(*I->user_begin());
125 int UserISD = TLI->InstructionOpcodeToISD(SingleUser->getOpcode());
126
127 // extmul_low support
128 if (UserISD == ISD::MUL &&
130 // Free low extensions.
131 if ((SrcVT == MVT::v8i8 && DstVT == MVT::v8i16) ||
132 (SrcVT == MVT::v4i16 && DstVT == MVT::v4i32) ||
133 (SrcVT == MVT::v2i32 && DstVT == MVT::v2i64)) {
134 return 0;
135 }
136 // Will require an additional extlow operation for the intermediate
137 // i16/i32 value.
138 if ((SrcVT == MVT::v4i8 && DstVT == MVT::v4i32) ||
139 (SrcVT == MVT::v2i16 && DstVT == MVT::v2i64)) {
140 return 1;
141 }
142 }
143 }
144
145 static constexpr TypeConversionCostTblEntry ConversionTbl[] = {
146 // extend_low
147 {ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1},
148 {ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1},
149 {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1},
150 {ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1},
151 {ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1},
152 {ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1},
153 // 2 x extend_low
154 {ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i16, 2},
155 {ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i16, 2},
156 {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 2},
157 {ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2},
158 // extend_low, extend_high
159 {ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2},
160 {ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2},
161 {ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2},
162 {ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2},
163 {ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2},
164 {ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2},
165 // 2x extend_low, extend_high
166 {ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 4},
167 {ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i32, 4},
168 {ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 4},
169 {ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 4},
170 // 6x extend_low, extend_high
171 {ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6},
172 {ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6},
173 // shuffle
174 {ISD::TRUNCATE, MVT::v2i16, MVT::v2i32, 2},
175 {ISD::TRUNCATE, MVT::v2i8, MVT::v2i32, 4},
176 {ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 2},
177 {ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 4},
178 // narrow, and
179 {ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 2},
180 {ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2},
181 // narrow, 2x and
182 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3},
183 // 3x narrow, 4x and
184 {ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 7},
185 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 7},
186 // 7x narrow, 8x and
187 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 15},
188 // convert_i32x4
189 {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1},
190 {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1},
191 {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1},
192 {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1},
193 // extend_low, convert
194 {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2},
195 {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2},
196 {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2},
197 {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2},
198 // extend_low x 2, convert
199 {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3},
200 {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3},
201 {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3},
202 {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3},
203 // several shuffles
204 {ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10},
205 {ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10},
206 {ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 10},
207 {ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10},
208 /// trunc_sat, const, and, 3x narrow
209 {ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 6},
210 {ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 6},
211 {ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 6},
212 {ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 6},
213 /// trunc_sat, const, and, narrow
214 {ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 4},
215 {ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 4},
216 {ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 4},
217 {ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 4},
218 // 2x trunc_sat, const, 2x and, 3x narrow
219 {ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 8},
220 {ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 8},
221 // 2x trunc_sat, const, 2x and, narrow
222 {ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 6},
223 {ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 6},
224 };
225
226 if (const auto *Entry =
227 ConvertCostTableLookup(ConversionTbl, ISD, DstVT, SrcVT)) {
228 return Entry->Cost;
229 }
230
231 return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
232}
233
235WebAssemblyTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
237
238 Options.AllowOverlappingLoads = true;
239
240 if (ST->hasSIMD128())
241 Options.LoadSizes.push_back(16);
242
243 Options.LoadSizes.append({8, 4, 2, 1});
244 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
245 Options.NumLoadsPerBlock = Options.MaxNumLoads;
246
247 return Options;
248}
249
251 unsigned Opcode, Type *Ty, Align Alignment, unsigned AddressSpace,
253 const Instruction *I) const {
254 if (!ST->hasSIMD128() || !isa<FixedVectorType>(Ty)) {
255 return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace,
256 CostKind);
257 }
258
259 EVT VT = TLI->getValueType(DL, Ty, true);
260 // Type legalization can't handle structs
261 if (VT == MVT::Other)
262 return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace,
263 CostKind);
264
265 auto LT = getTypeLegalizationCost(Ty);
266 if (!LT.first.isValid())
268
269 int ISD = TLI->InstructionOpcodeToISD(Opcode);
270 unsigned width = VT.getSizeInBits();
271 if (ISD == ISD::LOAD) {
272 // 128-bit loads are a single instruction. 32-bit and 64-bit vector loads
273 // can be lowered to load32_zero and load64_zero respectively. Assume SIMD
274 // loads are twice as expensive as scalar.
275 switch (width) {
276 default:
277 break;
278 case 32:
279 case 64:
280 case 128:
281 return 2;
282 }
283 } else if (ISD == ISD::STORE) {
284 // For stores, we can use store lane operations.
285 switch (width) {
286 default:
287 break;
288 case 8:
289 case 16:
290 case 32:
291 case 64:
292 case 128:
293 return 2;
294 }
295 }
296
297 return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace, CostKind);
298}
299
301 TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy,
304 const Instruction *CxtI) const {
305 // Canonicalize the ShuffleKind in case optimizations didn't.
306 // Otherwise, we might end up with the wrong ShuffleKind to match against.
307
308 Kind = improveShuffleKindFromMask(Kind, Mask, SrcTy, Index, SubTp);
309
310 // Wasm SIMD128 has native splat instructions for all lane types.
311 if (ST->hasSIMD128() && Kind == TTI::SK_Broadcast &&
313 return 1;
314
315 return BaseT::getShuffleCost(Kind, DstTy, SrcTy, Mask, CostKind, Index, SubTp,
316 Args, CxtI);
317}
318
320 unsigned Opcode, Type *Ty, unsigned Factor, ArrayRef<unsigned> Indices,
321 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
322 bool UseMaskForCond, bool UseMaskForGaps) const {
323 assert(Factor >= 2 && "Invalid interleave factor");
324
325 auto *VecTy = cast<VectorType>(Ty);
326 if (!ST->hasSIMD128() || !isa<FixedVectorType>(VecTy)) {
328 }
329
330 if (UseMaskForCond || UseMaskForGaps)
331 return BaseT::getInterleavedMemoryOpCost(Opcode, Ty, Factor, Indices,
332 Alignment, AddressSpace, CostKind,
333 UseMaskForCond, UseMaskForGaps);
334
335 constexpr unsigned MaxInterleaveFactor = 4;
336 if (Factor <= MaxInterleaveFactor) {
337 unsigned MinElts = VecTy->getElementCount().getKnownMinValue();
338 // Ensure the number of vector elements is greater than 1.
339 if (MinElts < 2 || MinElts % Factor != 0)
341
342 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
343 // Ensure the element type is legal.
344 if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
346
347 if (Factor != 2 && Factor != 4)
349
350 auto *SubVecTy =
351 VectorType::get(VecTy->getElementType(),
352 VecTy->getElementCount().divideCoefficientBy(Factor));
353 InstructionCost MemCost =
354 getMemoryOpCost(Opcode, SubVecTy, Alignment, AddressSpace, CostKind);
355
356 unsigned VecSize = DL.getTypeSizeInBits(SubVecTy);
357 unsigned MaxVecSize = 128;
358 unsigned NumAccesses =
359 std::max<unsigned>(1, (MinElts * ElSize + MaxVecSize - 1) / VecSize);
360
361 // A stride of two is commonly supported via dedicated instructions, so it
362 // should be relatively cheap for all element sizes. A stride of four is
363 // more expensive as it will likely require more shuffles. Using two
364 // simd128 inputs is considered more expensive and we mainly account for
365 // shuffling two inputs (32 bytes), but we do model 4 x v4i32 to enable
366 // arithmetic kernels.
367 static const CostTblEntry ShuffleCostTbl[] = {
368 // One reg.
369 {2, MVT::v2i8, 1}, // interleave 2 x 2i8 into 4i8
370 {2, MVT::v4i8, 1}, // interleave 2 x 4i8 into 8i8
371 {2, MVT::v8i8, 1}, // interleave 2 x 8i8 into 16i8
372 {2, MVT::v2i16, 1}, // interleave 2 x 2i16 into 4i16
373 {2, MVT::v4i16, 1}, // interleave 2 x 4i16 into 8i16
374 {2, MVT::v2i32, 1}, // interleave 2 x 2i32 into 4i32
375
376 // Two regs.
377 {2, MVT::v16i8, 2}, // interleave 2 x 16i8 into 32i8
378 {2, MVT::v8i16, 2}, // interleave 2 x 8i16 into 16i16
379 {2, MVT::v4i32, 2}, // interleave 2 x 4i32 into 8i32
380
381 // One reg.
382 {4, MVT::v2i8, 4}, // interleave 4 x 2i8 into 8i8
383 {4, MVT::v4i8, 4}, // interleave 4 x 4i8 into 16i8
384 {4, MVT::v2i16, 4}, // interleave 4 x 2i16 into 8i16
385
386 // Two regs.
387 {4, MVT::v8i8, 16}, // interleave 4 x 8i8 into 32i8
388 {4, MVT::v4i16, 8}, // interleave 4 x 4i16 into 16i16
389 {4, MVT::v2i32, 4}, // interleave 4 x 2i32 into 8i32
390
391 // Four regs.
392 {4, MVT::v4i32, 16}, // interleave 4 x 4i32 into 16i32
393 };
394
395 EVT ETy = TLI->getValueType(DL, SubVecTy);
396 if (const auto *Entry =
397 CostTableLookup(ShuffleCostTbl, Factor, ETy.getSimpleVT()))
398 return Entry->Cost + (NumAccesses * MemCost);
399 }
400
401 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
402 Alignment, AddressSpace, CostKind,
403 UseMaskForCond, UseMaskForGaps);
404}
405
407 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
408 const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC) const {
410 Opcode, Val, CostKind, Index, Op0, Op1, VIC);
411
412 // SIMD128's insert/extract currently only take constant indices.
413 if (Index == -1u)
415
416 return Cost;
417}
418
420 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
422 TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,
423 TTI::TargetCostKind CostKind, std::optional<FastMathFlags> FMF) const {
425 if (!VF.isFixed() || !ST->hasSIMD128())
426 return Invalid;
427
429 return Invalid;
430
431 if (Opcode != Instruction::Add)
432 return Invalid;
433
434 EVT AccumEVT = EVT::getEVT(AccumType);
435 // TODO: Add i64 accumulator.
436 if (AccumEVT != MVT::i32)
437 return Invalid;
438
439 // Possible options:
440 // - i16x8.extadd_pairwise_i8x16_sx
441 // - i32x4.extadd_pairwise_i16x8_sx
442 // - i32x4.dot_i16x8_s
443 // Only try to support dot, for now.
444
445 EVT InputEVT = EVT::getEVT(InputTypeA);
446 if (!((InputEVT == MVT::i16 && VF.getFixedValue() == 8) ||
447 (InputEVT == MVT::i8 && VF.getFixedValue() == 16))) {
448 return Invalid;
449 }
450
451 if (OpAExtend == TTI::PR_None)
452 return Invalid;
453
455 if (!BinOp)
456 return Cost;
457
458 if (OpAExtend != OpBExtend)
459 return Invalid;
460
461 if (*BinOp != Instruction::Mul)
462 return Invalid;
463
464 if (InputTypeA != InputTypeB)
465 return Invalid;
466
467 // Signed inputs can lower to dot
468 if (InputEVT == MVT::i16 && VF.getFixedValue() == 8)
469 return OpAExtend == TTI::PR_SignExtend ? Cost : Cost * 2;
470
471 // Double the size of the lowered sequence.
472 if (InputEVT == MVT::i8 && VF.getFixedValue() == 16)
473 return OpAExtend == TTI::PR_SignExtend ? Cost * 2 : Cost * 4;
474
475 return Invalid;
476}
477
479 const IntrinsicInst *II) const {
480
481 switch (II->getIntrinsicID()) {
482 default:
483 break;
484 case Intrinsic::vector_reduce_fadd:
486 }
488}
489
492 OptimizationRemarkEmitter *ORE) const {
493 // Scan the loop: don't unroll loops with calls. This is a standard approach
494 // for most (all?) targets.
495 for (BasicBlock *BB : L->blocks())
496 for (Instruction &I : *BB)
499 if (isLoweredToCall(F))
500 return;
501
502 // The chosen threshold is within the range of 'LoopMicroOpBufferSize' of
503 // the various microarchitectures that use the BasicTTI implementation and
504 // has been selected through heuristics across multiple cores and runtimes.
505 UP.Partial = UP.Runtime = UP.UpperBound = true;
506 UP.PartialThreshold = 30;
507
508 // Avoid unrolling when optimizing for size.
509 UP.OptSizeThreshold = 0;
511
512 // Set number of instructions optimized when "back edge"
513 // becomes "fall through" to default value of 2.
514 UP.BEInsns = 2;
515}
516
518 return getST()->hasTailCall();
519}
520
523 using namespace llvm::PatternMatch;
524
525 if (!I->getType()->isVectorTy() || !I->isShift())
526 return false;
527
528 Value *V = I->getOperand(1);
529 // We dont need to sink constant splat.
530 if (isa<Constant>(V))
531 return false;
532
534 m_Value(), m_ZeroMask()))) {
535 // Sink insert
536 Ops.push_back(&cast<Instruction>(V)->getOperandUse(0));
537 // Sink shuffle
538 Ops.push_back(&I->getOperandUse(1));
539 return true;
540 }
541
542 return false;
543}
544
545/// Attempt to convert [relaxed_]swizzle to shufflevector if the mask is
546/// constant.
549 bool IsRelaxed) {
550 auto *V = dyn_cast<Constant>(II.getArgOperand(1));
551 if (!V)
552 return nullptr;
553
554 auto *VecTy = cast<FixedVectorType>(II.getType());
555 unsigned NumElts = VecTy->getNumElements();
556 assert(NumElts == 16);
557
558 // Construct a shuffle mask from constant integers or UNDEFs.
559 int Indexes[16];
560 bool AnyOutOfBounds = false;
561
562 for (unsigned I = 0; I < NumElts; ++I) {
563 Constant *COp = V->getAggregateElement(I);
564 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
565 return nullptr;
566
567 if (isa<UndefValue>(COp)) {
568 Indexes[I] = -1;
569 continue;
570 }
571
572 if (IsRelaxed && cast<ConstantInt>(COp)->getSExtValue() >= NumElts) {
573 // The relaxed_swizzle operation always returns 0 if the lane index is
574 // less than 0 when interpreted as a signed value. For lane indices above
575 // 15, however, it can choose between returning 0 or the lane at `Index %
576 // 16`. However, the choice must be made consistently. As the WebAssembly
577 // spec states:
578 //
579 // "The result of relaxed operators are implementation-dependent, because
580 // the set of possible results may depend on properties of the host
581 // environment, such as its hardware. Technically, their behaviour is
582 // controlled by a set of global parameters to the semantics that an
583 // implementation can instantiate in different ways. These choices are
584 // fixed, that is, parameters are constant during the execution of any
585 // given program."
586 //
587 // The WebAssembly runtime may choose differently from us, so we can't
588 // optimize a relaxed swizzle with lane indices above 15.
589 return nullptr;
590 }
591
592 uint64_t Index = cast<ConstantInt>(COp)->getZExtValue();
593 if (Index >= NumElts) {
594 AnyOutOfBounds = true;
595 // If there are out-of-bounds indices, the swizzle instruction returns
596 // zeroes in those lanes. We'll provide an all-zeroes vector as the
597 // second argument to shufflevector and read the first element from it.
598 Indexes[I] = NumElts;
599 continue;
600 }
601
602 Indexes[I] = Index;
603 }
604
605 auto *V1 = II.getArgOperand(0);
606 auto *V2 =
607 AnyOutOfBounds ? Constant::getNullValue(VecTy) : PoisonValue::get(VecTy);
608
609 return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes, NumElts));
610}
611
612std::optional<Instruction *>
614 IntrinsicInst &II) const {
615 Intrinsic::ID IID = II.getIntrinsicID();
616 switch (IID) {
617 case Intrinsic::wasm_swizzle:
618 case Intrinsic::wasm_relaxed_swizzle:
619 if (Value *V = simplifyWasmSwizzle(
620 II, IC.Builder, IID == Intrinsic::wasm_relaxed_swizzle)) {
621 return IC.replaceInstUsesWith(II, V);
622 }
623 break;
624 }
625
626 return std::nullopt;
627}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
static const int MaxVecSize
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static const Function * getCalledFunction(const Value *V)
uint64_t IntrinsicInst * II
static Value * simplifyWasmSwizzle(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsRelaxed)
Attempt to convert [relaxed_]swizzle to shufflevector if the mask is constant.
This file a TargetTransformInfoImplBase conforming object specific to the WebAssembly target machine.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
LLVM Basic Block Representation.
Definition BasicBlock.h:62
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
The core instruction combiner logic.
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
BuilderTy & Builder
static InstructionCost getInvalid(CostType Val=0)
A wrapper class for inspecting calls to intrinsic functions.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
The optimization diagnostic interface.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
The main scalar evolution driver.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
virtual unsigned getNumberOfRegisters(unsigned ClassID) const
virtual bool isLoweredToCall(const Function *F) const
VectorInstrContext
Represents a hint about the context in which an insert/extract is used.
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
PopcntSupportKind
Flags indicating the kind of support for population count.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Basic
The cost of a typical 'add' instruction.
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_Broadcast
Broadcast element 0 to all other elements.
CastContextHint
Represents a hint about the context in which a cast is used.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM Value Representation.
Definition Value.h:75
Base class of all SIMD vector types.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *Ty, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) const override
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
TTI::ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
unsigned getNumberOfRegisters(unsigned ClassID) const override
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:171
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
Definition ISDOpcodes.h:24
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
bool match(Val *V, const Pattern &P)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
This is an optimization pass for GlobalISel generic memory operations.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
Definition CostTable.h:35
InstructionCost Cost
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
Definition CostTable.h:61
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
CostTblEntryT< unsigned > CostTblEntry
Definition CostTable.h:30
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
Definition CostTable.h:66
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
Returns options for expansion of memcmp. IsZeroCmp is.
Parameters that control the generic loop unrolling transformation.
bool UpperBound
Allow using trip count upper bound to unroll loops.
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).