LLVM 22.0.0git
InstCombineVectorOps.cpp
Go to the documentation of this file.
1//===- InstCombineVectorOps.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements instcombine for ExtractElement, InsertElement and
10// ShuffleVector.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombineInternal.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/Constant.h"
26#include "llvm/IR/Constants.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
31#include "llvm/IR/Operator.h"
33#include "llvm/IR/Type.h"
34#include "llvm/IR/User.h"
35#include "llvm/IR/Value.h"
39#include <cassert>
40#include <cstdint>
41#include <iterator>
42#include <utility>
43
44#define DEBUG_TYPE "instcombine"
45
46using namespace llvm;
47using namespace PatternMatch;
48
49STATISTIC(NumAggregateReconstructionsSimplified,
50 "Number of aggregate reconstructions turned into reuse of the "
51 "original aggregate");
52
53/// Return true if the value is cheaper to scalarize than it is to leave as a
54/// vector operation. If the extract index \p EI is a constant integer then
55/// some operations may be cheap to scalarize.
56///
57/// FIXME: It's possible to create more instructions than previously existed.
58static bool cheapToScalarize(Value *V, Value *EI) {
60
61 // If we can pick a scalar constant value out of a vector, that is free.
62 if (auto *C = dyn_cast<Constant>(V))
63 return CEI || C->getSplatValue();
64
66 ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
67 // Index needs to be lower than the minimum size of the vector, because
68 // for scalable vector, the vector size is known at run time.
69 return CEI->getValue().ult(EC.getKnownMinValue());
70 }
71
72 // An insertelement to the same constant index as our extract will simplify
73 // to the scalar inserted element. An insertelement to a different constant
74 // index is irrelevant to our extract.
76 return CEI;
77
78 if (match(V, m_OneUse(m_Load(m_Value()))))
79 return true;
80
81 if (match(V, m_OneUse(m_UnOp())))
82 return true;
83
84 Value *V0, *V1;
85 if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
86 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
87 return true;
88
89 CmpPredicate UnusedPred;
90 if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
91 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
92 return true;
93
94 return false;
95}
96
97// If we have a PHI node with a vector type that is only used to feed
98// itself and be an operand of extractelement at a constant location,
99// try to replace the PHI of the vector type with a PHI of a scalar type.
100Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
101 PHINode *PN) {
102 SmallVector<Instruction *, 2> Extracts;
103 // The users we want the PHI to have are:
104 // 1) The EI ExtractElement (we already know this)
105 // 2) Possibly more ExtractElements with the same index.
106 // 3) Another operand, which will feed back into the PHI.
107 Instruction *PHIUser = nullptr;
108 for (auto *U : PN->users()) {
109 if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
110 if (EI.getIndexOperand() == EU->getIndexOperand())
111 Extracts.push_back(EU);
112 else
113 return nullptr;
114 } else if (!PHIUser) {
115 PHIUser = cast<Instruction>(U);
116 } else {
117 return nullptr;
118 }
119 }
120
121 if (!PHIUser)
122 return nullptr;
123
124 // Verify that this PHI user has one use, which is the PHI itself,
125 // and that it is a binary operation which is cheap to scalarize.
126 // otherwise return nullptr.
127 if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
128 !(isa<BinaryOperator>(PHIUser)) ||
129 !cheapToScalarize(PHIUser, EI.getIndexOperand()))
130 return nullptr;
131
132 // Create a scalar PHI node that will replace the vector PHI node
133 // just before the current PHI node.
134 PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
136 // Scalarize each PHI operand.
137 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
138 Value *PHIInVal = PN->getIncomingValue(i);
139 BasicBlock *inBB = PN->getIncomingBlock(i);
140 Value *Elt = EI.getIndexOperand();
141 // If the operand is the PHI induction variable:
142 if (PHIInVal == PHIUser) {
143 // Scalarize the binary operation. One operand is the
144 // scalar PHI, and the other is extracted from the other
145 // vector operand.
146 BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
147 unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
150 B0->getOperand(opId)->getName() + ".Elt"),
151 B0->getIterator());
152 // Preserve operand order for binary operation to preserve semantics of
153 // non-commutative operations.
154 Value *FirstOp = (B0->getOperand(0) == PN) ? scalarPHI : Op;
155 Value *SecondOp = (B0->getOperand(0) == PN) ? Op : scalarPHI;
156 Value *newPHIUser =
158 B0->getOpcode(), FirstOp, SecondOp, B0),
159 B0->getIterator());
160 scalarPHI->addIncoming(newPHIUser, inBB);
161 } else {
162 // Scalarize PHI input:
163 Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
164 // Insert the new instruction into the predecessor basic block.
165 Instruction *pos = dyn_cast<Instruction>(PHIInVal);
166 BasicBlock::iterator InsertPos;
167 if (pos && !isa<PHINode>(pos)) {
168 InsertPos = ++pos->getIterator();
169 } else {
170 InsertPos = inBB->getFirstInsertionPt();
171 }
172
173 InsertNewInstWith(newEI, InsertPos);
174
175 scalarPHI->addIncoming(newEI, inBB);
176 }
177 }
178
179 for (auto *E : Extracts) {
180 replaceInstUsesWith(*E, scalarPHI);
181 // Add old extract to worklist for DCE.
183 }
184
185 return &EI;
186}
187
188Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
189 Value *X;
190 uint64_t ExtIndexC;
191 if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
192 !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
193 return nullptr;
194
195 ElementCount NumElts =
196 cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
197 Type *DestTy = Ext.getType();
198 unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
199 bool IsBigEndian = DL.isBigEndian();
200
201 // If we are casting an integer to vector and extracting a portion, that is
202 // a shift-right and truncate.
203 if (X->getType()->isIntegerTy()) {
205 "Expected fixed vector type for bitcast from scalar integer");
206
207 // Big endian requires adjusting the extract index since MSB is at index 0.
208 // LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8
209 // BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8
210 if (IsBigEndian)
211 ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
212 unsigned ShiftAmountC = ExtIndexC * DestWidth;
213 if ((!ShiftAmountC ||
214 isDesirableIntType(X->getType()->getPrimitiveSizeInBits())) &&
215 Ext.getVectorOperand()->hasOneUse()) {
216 if (ShiftAmountC)
217 X = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset");
218 if (DestTy->isFloatingPointTy()) {
219 Type *DstIntTy = IntegerType::getIntNTy(X->getContext(), DestWidth);
220 Value *Trunc = Builder.CreateTrunc(X, DstIntTy);
221 return new BitCastInst(Trunc, DestTy);
222 }
223 return new TruncInst(X, DestTy);
224 }
225 }
226
227 if (!X->getType()->isVectorTy())
228 return nullptr;
229
230 // If this extractelement is using a bitcast from a vector of the same number
231 // of elements, see if we can find the source element from the source vector:
232 // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
233 auto *SrcTy = cast<VectorType>(X->getType());
234 ElementCount NumSrcElts = SrcTy->getElementCount();
235 if (NumSrcElts == NumElts)
236 if (Value *Elt = findScalarElement(X, ExtIndexC))
237 return new BitCastInst(Elt, DestTy);
238
239 assert(NumSrcElts.isScalable() == NumElts.isScalable() &&
240 "Src and Dst must be the same sort of vector type");
241
242 // If the source elements are wider than the destination, try to shift and
243 // truncate a subset of scalar bits of an insert op.
244 if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
245 Value *Scalar;
246 Value *Vec;
247 uint64_t InsIndexC;
248 if (!match(X, m_InsertElt(m_Value(Vec), m_Value(Scalar),
249 m_ConstantInt(InsIndexC))))
250 return nullptr;
251
252 // The extract must be from the subset of vector elements that we inserted
253 // into. Example: if we inserted element 1 of a <2 x i64> and we are
254 // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
255 // of elements 4-7 of the bitcasted vector.
256 unsigned NarrowingRatio =
257 NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
258
259 if (ExtIndexC / NarrowingRatio != InsIndexC) {
260 // Remove insertelement, if we don't use the inserted element.
261 // extractelement (bitcast (insertelement (Vec, b)), a) ->
262 // extractelement (bitcast (Vec), a)
263 // FIXME: this should be removed to SimplifyDemandedVectorElts,
264 // once scale vectors are supported.
265 if (X->hasOneUse() && Ext.getVectorOperand()->hasOneUse()) {
266 Value *NewBC = Builder.CreateBitCast(Vec, Ext.getVectorOperandType());
267 return ExtractElementInst::Create(NewBC, Ext.getIndexOperand());
268 }
269 return nullptr;
270 }
271
272 // We are extracting part of the original scalar. How that scalar is
273 // inserted into the vector depends on the endian-ness. Example:
274 // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
275 // +--+--+--+--+--+--+--+--+
276 // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
277 // extelt <4 x i16> V', 3: | |S2|S3|
278 // +--+--+--+--+--+--+--+--+
279 // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
280 // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
281 // In this example, we must right-shift little-endian. Big-endian is just a
282 // truncate.
283 unsigned Chunk = ExtIndexC % NarrowingRatio;
284 if (IsBigEndian)
285 Chunk = NarrowingRatio - 1 - Chunk;
286
287 // Bail out if this is an FP vector to FP vector sequence. That would take
288 // more instructions than we started with unless there is no shift, and it
289 // may not be handled as well in the backend.
290 bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
291 bool NeedDestBitcast = DestTy->isFloatingPointTy();
292 if (NeedSrcBitcast && NeedDestBitcast)
293 return nullptr;
294
295 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
296 unsigned ShAmt = Chunk * DestWidth;
297
298 // TODO: This limitation is more strict than necessary. We could sum the
299 // number of new instructions and subtract the number eliminated to know if
300 // we can proceed.
301 if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
302 if (NeedSrcBitcast || NeedDestBitcast)
303 return nullptr;
304
305 if (NeedSrcBitcast) {
306 Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
307 Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
308 }
309
310 if (ShAmt) {
311 // Bail out if we could end with more instructions than we started with.
312 if (!Ext.getVectorOperand()->hasOneUse())
313 return nullptr;
314 Scalar = Builder.CreateLShr(Scalar, ShAmt);
315 }
316
317 if (NeedDestBitcast) {
318 Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
319 return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
320 }
321 return new TruncInst(Scalar, DestTy);
322 }
323
324 return nullptr;
325}
326
327/// Find elements of V demanded by UserInstr. If returns false, we were not able
328/// to determine all elements.
330 APInt &UnionUsedElts) {
331 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
332
333 switch (UserInstr->getOpcode()) {
334 case Instruction::ExtractElement: {
336 assert(EEI->getVectorOperand() == V);
338 if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
339 UnionUsedElts.setBit(EEIIndexC->getZExtValue());
340 return true;
341 }
342 break;
343 }
344 case Instruction::ShuffleVector: {
345 ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
346 unsigned MaskNumElts =
347 cast<FixedVectorType>(UserInstr->getType())->getNumElements();
348
349 for (auto I : llvm::seq(MaskNumElts)) {
350 unsigned MaskVal = Shuffle->getMaskValue(I);
351 if (MaskVal == -1u || MaskVal >= 2 * VWidth)
352 continue;
353 if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
354 UnionUsedElts.setBit(MaskVal);
355 if (Shuffle->getOperand(1) == V &&
356 ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
357 UnionUsedElts.setBit(MaskVal - VWidth);
358 }
359 return true;
360 }
361 default:
362 break;
363 }
364
365 return false;
366}
367
368/// Find union of elements of V demanded by all its users.
369/// If it is known by querying findDemandedEltsBySingleUser that
370/// no user demands an element of V, then the corresponding bit
371/// remains unset in the returned value.
373 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
374
375 APInt UnionUsedElts(VWidth, 0);
376 for (const Use &U : V->uses()) {
377 if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
378 if (!findDemandedEltsBySingleUser(V, I, UnionUsedElts))
379 return APInt::getAllOnes(VWidth);
380 } else {
381 UnionUsedElts = APInt::getAllOnes(VWidth);
382 break;
383 }
384
385 if (UnionUsedElts.isAllOnes())
386 break;
387 }
388
389 return UnionUsedElts;
390}
391
392/// Given a constant index for a extractelement or insertelement instruction,
393/// return it with the canonical type if it isn't already canonical. We
394/// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't
395/// matter, we just want a consistent type to simplify CSE.
397 const unsigned IndexBW = IndexC->getBitWidth();
398 if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64)
399 return nullptr;
400 return ConstantInt::get(IndexC->getContext(),
401 IndexC->getValue().zextOrTrunc(64));
402}
403
405 Value *SrcVec = EI.getVectorOperand();
406 Value *Index = EI.getIndexOperand();
407 if (Value *V = simplifyExtractElementInst(SrcVec, Index,
408 SQ.getWithInstruction(&EI)))
409 return replaceInstUsesWith(EI, V);
410
411 // extractelt (select %x, %vec1, %vec2), %const ->
412 // select %x, %vec1[%const], %vec2[%const]
413 // TODO: Support constant folding of multiple select operands:
414 // extractelt (select %x, %vec1, %vec2), (select %x, %c1, %c2)
415 // If the extractelement will for instance try to do out of bounds accesses
416 // because of the values of %c1 and/or %c2, the sequence could be optimized
417 // early. This is currently not possible because constant folding will reach
418 // an unreachable assertion if it doesn't find a constant operand.
420 if (SI->getCondition()->getType()->isIntegerTy() &&
422 if (Instruction *R = FoldOpIntoSelect(EI, SI))
423 return R;
424
425 // If extracting a specified index from the vector, see if we can recursively
426 // find a previously computed scalar that was inserted into the vector.
427 auto *IndexC = dyn_cast<ConstantInt>(Index);
428 bool HasKnownValidIndex = false;
429 if (IndexC) {
430 // Canonicalize type of constant indices to i64 to simplify CSE
431 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
432 return replaceOperand(EI, 1, NewIdx);
433
435 unsigned NumElts = EC.getKnownMinValue();
436 HasKnownValidIndex = IndexC->getValue().ult(NumElts);
437
439 Intrinsic::ID IID = II->getIntrinsicID();
440 // Index needs to be lower than the minimum size of the vector, because
441 // for scalable vector, the vector size is known at run time.
442 if (IID == Intrinsic::stepvector && IndexC->getValue().ult(NumElts)) {
443 Type *Ty = EI.getType();
444 unsigned BitWidth = Ty->getIntegerBitWidth();
445 Value *Idx;
446 // Return index when its value does not exceed the allowed limit
447 // for the element type of the vector.
448 // TODO: Truncate out-of-range values.
449 if (IndexC->getValue().getActiveBits() <= BitWidth)
450 Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));
451 else
452 return nullptr;
453 return replaceInstUsesWith(EI, Idx);
454 }
455 }
456
457 // InstSimplify should handle cases where the index is invalid.
458 // For fixed-length vector, it's invalid to extract out-of-range element.
459 if (!EC.isScalable() && IndexC->getValue().uge(NumElts))
460 return nullptr;
461
462 if (Instruction *I = foldBitcastExtElt(EI))
463 return I;
464
465 // If there's a vector PHI feeding a scalar use through this extractelement
466 // instruction, try to scalarize the PHI.
467 if (auto *Phi = dyn_cast<PHINode>(SrcVec))
468 if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
469 return ScalarPHI;
470 }
471
472 // If SrcVec is a subvector starting at index 0, extract from the
473 // wider source vector
474 Value *V;
475 if (match(SrcVec,
477 return ExtractElementInst::Create(V, Index);
478
479 // TODO come up with a n-ary matcher that subsumes both unary and
480 // binary matchers.
481 UnaryOperator *UO;
482 if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, Index)) {
483 // extelt (unop X), Index --> unop (extelt X, Index)
484 Value *X = UO->getOperand(0);
485 Value *E = Builder.CreateExtractElement(X, Index);
487 }
488
489 // If the binop is not speculatable, we cannot hoist the extractelement if
490 // it may make the operand poison.
491 BinaryOperator *BO;
492 if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, Index) &&
493 (HasKnownValidIndex ||
495 // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
496 Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
497 Value *E0 = Builder.CreateExtractElement(X, Index);
498 Value *E1 = Builder.CreateExtractElement(Y, Index);
499 return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
500 }
501
502 Value *X, *Y;
503 CmpPredicate Pred;
504 if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
505 cheapToScalarize(SrcVec, Index)) {
506 // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
507 Value *E0 = Builder.CreateExtractElement(X, Index);
508 Value *E1 = Builder.CreateExtractElement(Y, Index);
509 CmpInst *SrcCmpInst = cast<CmpInst>(SrcVec);
510 return CmpInst::CreateWithCopiedFlags(SrcCmpInst->getOpcode(), Pred, E0, E1,
511 SrcCmpInst);
512 }
513
514 if (auto *I = dyn_cast<Instruction>(SrcVec)) {
515 if (auto *IE = dyn_cast<InsertElementInst>(I)) {
516 // instsimplify already handled the case where the indices are constants
517 // and equal by value, if both are constants, they must not be the same
518 // value, extract from the pre-inserted value instead.
519 if (isa<Constant>(IE->getOperand(2)) && IndexC)
520 return replaceOperand(EI, 0, IE->getOperand(0));
521 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
522 auto *VecType = cast<VectorType>(GEP->getType());
523 ElementCount EC = VecType->getElementCount();
524 uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;
525 if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
526 // Find out why we have a vector result - these are a few examples:
527 // 1. We have a scalar pointer and a vector of indices, or
528 // 2. We have a vector of pointers and a scalar index, or
529 // 3. We have a vector of pointers and a vector of indices, etc.
530 // Here we only consider combining when there is exactly one vector
531 // operand, since the optimization is less obviously a win due to
532 // needing more than one extractelements.
533
534 unsigned VectorOps =
535 llvm::count_if(GEP->operands(), [](const Value *V) {
536 return isa<VectorType>(V->getType());
537 });
538 if (VectorOps == 1) {
539 Value *NewPtr = GEP->getPointerOperand();
540 if (isa<VectorType>(NewPtr->getType()))
541 NewPtr = Builder.CreateExtractElement(NewPtr, IndexC);
542
544 for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
545 Value *Op = GEP->getOperand(I);
546 if (isa<VectorType>(Op->getType()))
547 NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));
548 else
549 NewOps.push_back(Op);
550 }
551
553 GEP->getSourceElementType(), NewPtr, NewOps);
554 NewGEP->setNoWrapFlags(GEP->getNoWrapFlags());
555 return NewGEP;
556 }
557 }
558 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
559 int SplatIndex = getSplatIndex(SVI->getShuffleMask());
560 // We know the all-0 splat must be reading from the first operand, even
561 // in the case of scalable vectors (vscale is always > 0).
562 if (SplatIndex == 0)
563 return ExtractElementInst::Create(SVI->getOperand(0),
564 Builder.getInt64(0));
565
566 if (isa<FixedVectorType>(SVI->getType())) {
567 std::optional<int> SrcIdx;
568 // getSplatIndex returns -1 to mean not-found.
569 if (SplatIndex != -1)
570 SrcIdx = SplatIndex;
571 else if (ConstantInt *CI = dyn_cast<ConstantInt>(Index))
572 SrcIdx = SVI->getMaskValue(CI->getZExtValue());
573
574 if (SrcIdx) {
575 Value *Src;
576 unsigned LHSWidth =
577 cast<FixedVectorType>(SVI->getOperand(0)->getType())
578 ->getNumElements();
579
580 if (*SrcIdx < 0)
582 if (*SrcIdx < (int)LHSWidth)
583 Src = SVI->getOperand(0);
584 else {
585 *SrcIdx -= LHSWidth;
586 Src = SVI->getOperand(1);
587 }
588 Type *Int64Ty = Type::getInt64Ty(EI.getContext());
590 Src, ConstantInt::get(Int64Ty, *SrcIdx, false));
591 }
592 }
593 } else if (auto *CI = dyn_cast<CastInst>(I)) {
594 // Canonicalize extractelement(cast) -> cast(extractelement).
595 // Bitcasts can change the number of vector elements, and they cost
596 // nothing.
597 if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
598 Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
599 return CastInst::Create(CI->getOpcode(), EE, EI.getType());
600 }
601 }
602 }
603
604 // Run demanded elements after other transforms as this can drop flags on
605 // binops. If there's two paths to the same final result, we prefer the
606 // one which doesn't force us to drop flags.
607 if (IndexC) {
609 unsigned NumElts = EC.getKnownMinValue();
610 // This instruction only demands the single element from the input vector.
611 // Skip for scalable type, the number of elements is unknown at
612 // compile-time.
613 if (!EC.isScalable() && NumElts != 1) {
614 // If the input vector has a single use, simplify it based on this use
615 // property.
616 if (SrcVec->hasOneUse()) {
617 APInt PoisonElts(NumElts, 0);
618 APInt DemandedElts(NumElts, 0);
619 DemandedElts.setBit(IndexC->getZExtValue());
620 if (Value *V =
621 SimplifyDemandedVectorElts(SrcVec, DemandedElts, PoisonElts))
622 return replaceOperand(EI, 0, V);
623 } else {
624 // If the input vector has multiple uses, simplify it based on a union
625 // of all elements used.
626 APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
627 if (!DemandedElts.isAllOnes()) {
628 APInt PoisonElts(NumElts, 0);
630 SrcVec, DemandedElts, PoisonElts, 0 /* Depth */,
631 true /* AllowMultipleUsers */)) {
632 if (V != SrcVec) {
633 Worklist.addValue(SrcVec);
634 SrcVec->replaceAllUsesWith(V);
635 return &EI;
636 }
637 }
638 }
639 }
640 }
641 }
642 return nullptr;
643}
644
645/// If V is a shuffle of values that ONLY returns elements from either LHS or
646/// RHS, return the shuffle mask and true. Otherwise, return false.
648 SmallVectorImpl<int> &Mask) {
649 assert(LHS->getType() == RHS->getType() &&
650 "Invalid CollectSingleShuffleElements");
651 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
652
653 if (match(V, m_Poison())) {
654 Mask.assign(NumElts, -1);
655 return true;
656 }
657
658 if (V == LHS) {
659 for (unsigned i = 0; i != NumElts; ++i)
660 Mask.push_back(i);
661 return true;
662 }
663
664 if (V == RHS) {
665 for (unsigned i = 0; i != NumElts; ++i)
666 Mask.push_back(i + NumElts);
667 return true;
668 }
669
671 // If this is an insert of an extract from some other vector, include it.
672 Value *VecOp = IEI->getOperand(0);
673 Value *ScalarOp = IEI->getOperand(1);
674 Value *IdxOp = IEI->getOperand(2);
675
676 if (!isa<ConstantInt>(IdxOp))
677 return false;
678 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
679
680 if (isa<PoisonValue>(ScalarOp)) { // inserting poison into vector.
681 // We can handle this if the vector we are inserting into is
682 // transitively ok.
683 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
684 // If so, update the mask to reflect the inserted poison.
685 Mask[InsertedIdx] = -1;
686 return true;
687 }
688 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
689 if (isa<ConstantInt>(EI->getOperand(1))) {
690 unsigned ExtractedIdx =
691 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
692 unsigned NumLHSElts =
693 cast<FixedVectorType>(LHS->getType())->getNumElements();
694
695 // This must be extracting from either LHS or RHS.
696 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
697 // We can handle this if the vector we are inserting into is
698 // transitively ok.
699 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
700 // If so, update the mask to reflect the inserted value.
701 if (EI->getOperand(0) == LHS) {
702 Mask[InsertedIdx % NumElts] = ExtractedIdx;
703 } else {
704 assert(EI->getOperand(0) == RHS);
705 Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
706 }
707 return true;
708 }
709 }
710 }
711 }
712 }
713
714 return false;
715}
716
717/// If we have insertion into a vector that is wider than the vector that we
718/// are extracting from, try to widen the source vector to allow a single
719/// shufflevector to replace one or more insert/extract pairs.
721 ExtractElementInst *ExtElt,
722 InstCombinerImpl &IC) {
723 auto *InsVecType = cast<FixedVectorType>(InsElt->getType());
724 auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType());
725 unsigned NumInsElts = InsVecType->getNumElements();
726 unsigned NumExtElts = ExtVecType->getNumElements();
727
728 // The inserted-to vector must be wider than the extracted-from vector.
729 if (InsVecType->getElementType() != ExtVecType->getElementType() ||
730 NumExtElts >= NumInsElts)
731 return false;
732
733 Value *ExtVecOp = ExtElt->getVectorOperand();
734 // Bail out on constant vectors.
735 if (isa<ConstantData>(ExtVecOp))
736 return false;
737
738 // Create a shuffle mask to widen the extended-from vector using poison
739 // values. The mask selects all of the values of the original vector followed
740 // by as many poison values as needed to create a vector of the same length
741 // as the inserted-to vector.
742 SmallVector<int, 16> ExtendMask;
743 for (unsigned i = 0; i < NumExtElts; ++i)
744 ExtendMask.push_back(i);
745 for (unsigned i = NumExtElts; i < NumInsElts; ++i)
746 ExtendMask.push_back(-1);
747
748 auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
749 BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
750 ? ExtVecOpInst->getParent()
751 : ExtElt->getParent();
752
753 // TODO: This restriction matches the basic block check below when creating
754 // new extractelement instructions. If that limitation is removed, this one
755 // could also be removed. But for now, we just bail out to ensure that we
756 // will replace the extractelement instruction that is feeding our
757 // insertelement instruction. This allows the insertelement to then be
758 // replaced by a shufflevector. If the insertelement is not replaced, we can
759 // induce infinite looping because there's an optimization for extractelement
760 // that will delete our widening shuffle. This would trigger another attempt
761 // here to create that shuffle, and we spin forever.
762 if (InsertionBlock != InsElt->getParent())
763 return false;
764
765 // TODO: This restriction matches the check in visitInsertElementInst() and
766 // prevents an infinite loop caused by not turning the extract/insert pair
767 // into a shuffle. We really should not need either check, but we're lacking
768 // folds for shufflevectors because we're afraid to generate shuffle masks
769 // that the backend can't handle.
770 if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
771 return false;
772
773 auto *WideVec = new ShuffleVectorInst(ExtVecOp, ExtendMask);
774
775 // Insert the new shuffle after the vector operand of the extract is defined
776 // (as long as it's not a PHI) or at the start of the basic block of the
777 // extract, so any subsequent extracts in the same basic block can use it.
778 // TODO: Insert before the earliest ExtractElementInst that is replaced.
779 if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
780 WideVec->insertAfter(ExtVecOpInst->getIterator());
781 else
782 IC.InsertNewInstWith(WideVec, ExtElt->getParent()->getFirstInsertionPt());
783
784 // Replace extracts from the original narrow vector with extracts from the new
785 // wide vector.
786 for (User *U : ExtVecOp->users()) {
788 if (!OldExt || OldExt->getParent() != WideVec->getParent())
789 continue;
790 auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
791 IC.InsertNewInstWith(NewExt, OldExt->getIterator());
792 IC.replaceInstUsesWith(*OldExt, NewExt);
793 // Add the old extracts to the worklist for DCE. We can't remove the
794 // extracts directly, because they may still be used by the calling code.
795 IC.addToWorklist(OldExt);
796 }
797
798 return true;
799}
800
801/// We are building a shuffle to create V, which is a sequence of insertelement,
802/// extractelement pairs. If PermittedRHS is set, then we must either use it or
803/// not rely on the second vector source. Return a std::pair containing the
804/// left and right vectors of the proposed shuffle (or 0), and set the Mask
805/// parameter as required.
806///
807/// Note: we intentionally don't try to fold earlier shuffles since they have
808/// often been chosen carefully to be efficiently implementable on the target.
809using ShuffleOps = std::pair<Value *, Value *>;
810
812 Value *PermittedRHS,
813 InstCombinerImpl &IC, bool &Rerun) {
814 assert(V->getType()->isVectorTy() && "Invalid shuffle!");
815 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
816
817 if (match(V, m_Poison())) {
818 Mask.assign(NumElts, -1);
819 return std::make_pair(
820 PermittedRHS ? PoisonValue::get(PermittedRHS->getType()) : V, nullptr);
821 }
822
824 Mask.assign(NumElts, 0);
825 return std::make_pair(V, nullptr);
826 }
827
829 // If this is an insert of an extract from some other vector, include it.
830 Value *VecOp = IEI->getOperand(0);
831 Value *ScalarOp = IEI->getOperand(1);
832 Value *IdxOp = IEI->getOperand(2);
833
835 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
836 unsigned ExtractedIdx =
837 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
838 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
839
840 // Either the extracted from or inserted into vector must be RHSVec,
841 // otherwise we'd end up with a shuffle of three inputs.
842 if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
843 Value *RHS = EI->getOperand(0);
844 ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC, Rerun);
845 assert(LR.second == nullptr || LR.second == RHS);
846
847 if (LR.first->getType() != RHS->getType()) {
848 // Although we are giving up for now, see if we can create extracts
849 // that match the inserts for another round of combining.
850 if (replaceExtractElements(IEI, EI, IC))
851 Rerun = true;
852
853 // We tried our best, but we can't find anything compatible with RHS
854 // further up the chain. Return a trivial shuffle.
855 for (unsigned i = 0; i < NumElts; ++i)
856 Mask[i] = i;
857 return std::make_pair(V, nullptr);
858 }
859
860 unsigned NumLHSElts =
861 cast<FixedVectorType>(RHS->getType())->getNumElements();
862 Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
863 return std::make_pair(LR.first, RHS);
864 }
865
866 if (VecOp == PermittedRHS) {
867 // We've gone as far as we can: anything on the other side of the
868 // extractelement will already have been converted into a shuffle.
869 unsigned NumLHSElts =
871 ->getNumElements();
872 for (unsigned i = 0; i != NumElts; ++i)
873 Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
874 return std::make_pair(EI->getOperand(0), PermittedRHS);
875 }
876
877 // If this insertelement is a chain that comes from exactly these two
878 // vectors, return the vector and the effective shuffle.
879 if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
880 collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
881 Mask))
882 return std::make_pair(EI->getOperand(0), PermittedRHS);
883 }
884 }
885 }
886
887 // Otherwise, we can't do anything fancy. Return an identity vector.
888 for (unsigned i = 0; i != NumElts; ++i)
889 Mask.push_back(i);
890 return std::make_pair(V, nullptr);
891}
892
893/// Look for chain of insertvalue's that fully define an aggregate, and trace
894/// back the values inserted, see if they are all were extractvalue'd from
895/// the same source aggregate from the exact same element indexes.
896/// If they were, just reuse the source aggregate.
897/// This potentially deals with PHI indirections.
899 InsertValueInst &OrigIVI) {
900 Type *AggTy = OrigIVI.getType();
901 unsigned NumAggElts;
902 switch (AggTy->getTypeID()) {
903 case Type::StructTyID:
904 NumAggElts = AggTy->getStructNumElements();
905 break;
906 case Type::ArrayTyID:
907 NumAggElts = AggTy->getArrayNumElements();
908 break;
909 default:
910 llvm_unreachable("Unhandled aggregate type?");
911 }
912
913 // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
914 // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}),
915 // FIXME: any interesting patterns to be caught with larger limit?
916 assert(NumAggElts > 0 && "Aggregate should have elements.");
917 if (NumAggElts > 2)
918 return nullptr;
919
920 static constexpr auto NotFound = std::nullopt;
921 static constexpr auto FoundMismatch = nullptr;
922
923 // Try to find a value of each element of an aggregate.
924 // FIXME: deal with more complex, not one-dimensional, aggregate types
925 SmallVector<std::optional<Instruction *>, 2> AggElts(NumAggElts, NotFound);
926
927 // Do we know values for each element of the aggregate?
928 auto KnowAllElts = [&AggElts]() {
929 return !llvm::is_contained(AggElts, NotFound);
930 };
931
932 int Depth = 0;
933
934 // Arbitrary `insertvalue` visitation depth limit. Let's be okay with
935 // every element being overwritten twice, which should never happen.
936 static const int DepthLimit = 2 * NumAggElts;
937
938 // Recurse up the chain of `insertvalue` aggregate operands until either we've
939 // reconstructed full initializer or can't visit any more `insertvalue`'s.
940 for (InsertValueInst *CurrIVI = &OrigIVI;
941 Depth < DepthLimit && CurrIVI && !KnowAllElts();
942 CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),
943 ++Depth) {
944 auto *InsertedValue =
945 dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand());
946 if (!InsertedValue)
947 return nullptr; // Inserted value must be produced by an instruction.
948
949 ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
950
951 // Don't bother with more than single-level aggregates.
952 if (Indices.size() != 1)
953 return nullptr; // FIXME: deal with more complex aggregates?
954
955 // Now, we may have already previously recorded the value for this element
956 // of an aggregate. If we did, that means the CurrIVI will later be
957 // overwritten with the already-recorded value. But if not, let's record it!
958 std::optional<Instruction *> &Elt = AggElts[Indices.front()];
959 Elt = Elt.value_or(InsertedValue);
960
961 // FIXME: should we handle chain-terminating undef base operand?
962 }
963
964 // Was that sufficient to deduce the full initializer for the aggregate?
965 if (!KnowAllElts())
966 return nullptr; // Give up then.
967
968 // We now want to find the source[s] of the aggregate elements we've found.
969 // And with "source" we mean the original aggregate[s] from which
970 // the inserted elements were extracted. This may require PHI translation.
971
972 enum class AggregateDescription {
973 /// When analyzing the value that was inserted into an aggregate, we did
974 /// not manage to find defining `extractvalue` instruction to analyze.
975 NotFound,
976 /// When analyzing the value that was inserted into an aggregate, we did
977 /// manage to find defining `extractvalue` instruction[s], and everything
978 /// matched perfectly - aggregate type, element insertion/extraction index.
979 Found,
980 /// When analyzing the value that was inserted into an aggregate, we did
981 /// manage to find defining `extractvalue` instruction, but there was
982 /// a mismatch: either the source type from which the extraction was didn't
983 /// match the aggregate type into which the insertion was,
984 /// or the extraction/insertion channels mismatched,
985 /// or different elements had different source aggregates.
986 FoundMismatch
987 };
988 auto Describe = [](std::optional<Value *> SourceAggregate) {
989 if (SourceAggregate == NotFound)
990 return AggregateDescription::NotFound;
991 if (*SourceAggregate == FoundMismatch)
992 return AggregateDescription::FoundMismatch;
993 return AggregateDescription::Found;
994 };
995
996 // If an aggregate element is defined in UseBB, we can't use it in PredBB.
997 bool EltDefinedInUseBB = false;
998
999 // Given the value \p Elt that was being inserted into element \p EltIdx of an
1000 // aggregate AggTy, see if \p Elt was originally defined by an
1001 // appropriate extractvalue (same element index, same aggregate type).
1002 // If found, return the source aggregate from which the extraction was.
1003 // If \p PredBB is provided, does PHI translation of an \p Elt first.
1004 auto FindSourceAggregate =
1005 [&](Instruction *Elt, unsigned EltIdx, std::optional<BasicBlock *> UseBB,
1006 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
1007 // For now(?), only deal with, at most, a single level of PHI indirection.
1008 if (UseBB && PredBB) {
1009 Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB));
1010 if (Elt && Elt->getParent() == *UseBB)
1011 EltDefinedInUseBB = true;
1012 }
1013 // FIXME: deal with multiple levels of PHI indirection?
1014
1015 // Did we find an extraction?
1016 auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt);
1017 if (!EVI)
1018 return NotFound;
1019
1020 Value *SourceAggregate = EVI->getAggregateOperand();
1021
1022 // Is the extraction from the same type into which the insertion was?
1023 if (SourceAggregate->getType() != AggTy)
1024 return FoundMismatch;
1025 // And the element index doesn't change between extraction and insertion?
1026 if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())
1027 return FoundMismatch;
1028
1029 return SourceAggregate; // AggregateDescription::Found
1030 };
1031
1032 // Given elements AggElts that were constructing an aggregate OrigIVI,
1033 // see if we can find appropriate source aggregate for each of the elements,
1034 // and see it's the same aggregate for each element. If so, return it.
1035 auto FindCommonSourceAggregate =
1036 [&](std::optional<BasicBlock *> UseBB,
1037 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
1038 std::optional<Value *> SourceAggregate;
1039
1040 for (auto I : enumerate(AggElts)) {
1041 assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
1042 "We don't store nullptr in SourceAggregate!");
1043 assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
1044 (I.index() != 0) &&
1045 "SourceAggregate should be valid after the first element,");
1046
1047 // For this element, is there a plausible source aggregate?
1048 // FIXME: we could special-case undef element, IFF we know that in the
1049 // source aggregate said element isn't poison.
1050 std::optional<Value *> SourceAggregateForElement =
1051 FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB);
1052
1053 // Okay, what have we found? Does that correlate with previous findings?
1054
1055 // Regardless of whether or not we have previously found source
1056 // aggregate for previous elements (if any), if we didn't find one for
1057 // this element, passthrough whatever we have just found.
1058 if (Describe(SourceAggregateForElement) != AggregateDescription::Found)
1059 return SourceAggregateForElement;
1060
1061 // Okay, we have found source aggregate for this element.
1062 // Let's see what we already know from previous elements, if any.
1063 switch (Describe(SourceAggregate)) {
1064 case AggregateDescription::NotFound:
1065 // This is apparently the first element that we have examined.
1066 SourceAggregate = SourceAggregateForElement; // Record the aggregate!
1067 continue; // Great, now look at next element.
1068 case AggregateDescription::Found:
1069 // We have previously already successfully examined other elements.
1070 // Is this the same source aggregate we've found for other elements?
1071 if (*SourceAggregateForElement != *SourceAggregate)
1072 return FoundMismatch;
1073 continue; // Still the same aggregate, look at next element.
1074 case AggregateDescription::FoundMismatch:
1075 llvm_unreachable("Can't happen. We would have early-exited then.");
1076 };
1077 }
1078
1079 assert(Describe(SourceAggregate) == AggregateDescription::Found &&
1080 "Must be a valid Value");
1081 return *SourceAggregate;
1082 };
1083
1084 std::optional<Value *> SourceAggregate;
1085
1086 // Can we find the source aggregate without looking at predecessors?
1087 SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/std::nullopt,
1088 /*PredBB=*/std::nullopt);
1089 if (Describe(SourceAggregate) != AggregateDescription::NotFound) {
1090 if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)
1091 return nullptr; // Conflicting source aggregates!
1092 ++NumAggregateReconstructionsSimplified;
1093 return replaceInstUsesWith(OrigIVI, *SourceAggregate);
1094 }
1095
1096 // Okay, apparently we need to look at predecessors.
1097
1098 // We should be smart about picking the "use" basic block, which will be the
1099 // merge point for aggregate, where we'll insert the final PHI that will be
1100 // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice.
1101 // We should look in which blocks each of the AggElts is being defined,
1102 // they all should be defined in the same basic block.
1103 BasicBlock *UseBB = nullptr;
1104
1105 for (const std::optional<Instruction *> &I : AggElts) {
1106 BasicBlock *BB = (*I)->getParent();
1107 // If it's the first instruction we've encountered, record the basic block.
1108 if (!UseBB) {
1109 UseBB = BB;
1110 continue;
1111 }
1112 // Otherwise, this must be the same basic block we've seen previously.
1113 if (UseBB != BB)
1114 return nullptr;
1115 }
1116
1117 // If *all* of the elements are basic-block-independent, meaning they are
1118 // either function arguments, or constant expressions, then if we didn't
1119 // handle them without predecessor-aware handling, we won't handle them now.
1120 if (!UseBB)
1121 return nullptr;
1122
1123 // If we didn't manage to find source aggregate without looking at
1124 // predecessors, and there are no predecessors to look at, then we're done.
1125 if (pred_empty(UseBB))
1126 return nullptr;
1127
1128 // Arbitrary predecessor count limit.
1129 static const int PredCountLimit = 64;
1130
1131 // Cache the (non-uniqified!) list of predecessors in a vector,
1132 // checking the limit at the same time for efficiency.
1133 SmallVector<BasicBlock *, 4> Preds; // May have duplicates!
1134 for (BasicBlock *Pred : predecessors(UseBB)) {
1135 // Don't bother if there are too many predecessors.
1136 if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
1137 return nullptr;
1138 Preds.emplace_back(Pred);
1139 }
1140
1141 // For each predecessor, what is the source aggregate,
1142 // from which all the elements were originally extracted from?
1143 // Note that we want for the map to have stable iteration order!
1145 bool FoundSrcAgg = false;
1146 for (BasicBlock *Pred : Preds) {
1147 std::pair<decltype(SourceAggregates)::iterator, bool> IV =
1148 SourceAggregates.try_emplace(Pred);
1149 // Did we already evaluate this predecessor?
1150 if (!IV.second)
1151 continue;
1152
1153 // Let's hope that when coming from predecessor Pred, all elements of the
1154 // aggregate produced by OrigIVI must have been originally extracted from
1155 // the same aggregate. Is that so? Can we find said original aggregate?
1156 SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
1157 if (Describe(SourceAggregate) == AggregateDescription::Found) {
1158 FoundSrcAgg = true;
1159 IV.first->second = *SourceAggregate;
1160 } else {
1161 // If UseBB is the single successor of Pred, we can add InsertValue to
1162 // Pred.
1163 auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
1164 if (!BI || !BI->isUnconditional())
1165 return nullptr;
1166 }
1167 }
1168
1169 if (!FoundSrcAgg)
1170 return nullptr;
1171
1172 // Do some sanity check if we need to add insertvalue into predecessors.
1173 auto OrigBB = OrigIVI.getParent();
1174 for (auto &It : SourceAggregates) {
1175 if (Describe(It.second) == AggregateDescription::Found)
1176 continue;
1177
1178 // Element is defined in UseBB, so it can't be used in predecessors.
1179 if (EltDefinedInUseBB)
1180 return nullptr;
1181
1182 // Do this transformation cross loop boundary may cause dead loop. So we
1183 // should avoid this situation. But LoopInfo is not generally available, we
1184 // must be conservative here.
1185 // If OrigIVI is in UseBB and it's the only successor of PredBB, PredBB
1186 // can't be in inner loop.
1187 if (UseBB != OrigBB)
1188 return nullptr;
1189
1190 // Avoid constructing constant aggregate because constant value may expose
1191 // more optimizations.
1192 bool ConstAgg = true;
1193 for (auto Val : AggElts) {
1194 Value *Elt = (*Val)->DoPHITranslation(UseBB, It.first);
1195 if (!isa<Constant>(Elt)) {
1196 ConstAgg = false;
1197 break;
1198 }
1199 }
1200 if (ConstAgg)
1201 return nullptr;
1202 }
1203
1204 // For predecessors without appropriate source aggregate, create one in the
1205 // predecessor.
1206 for (auto &It : SourceAggregates) {
1207 if (Describe(It.second) == AggregateDescription::Found)
1208 continue;
1209
1210 BasicBlock *Pred = It.first;
1211 Builder.SetInsertPoint(Pred->getTerminator());
1212 Value *V = PoisonValue::get(AggTy);
1213 for (auto [Idx, Val] : enumerate(AggElts)) {
1214 Value *Elt = (*Val)->DoPHITranslation(UseBB, Pred);
1215 V = Builder.CreateInsertValue(V, Elt, Idx);
1216 }
1217
1218 It.second = V;
1219 }
1220
1221 // All good! Now we just need to thread the source aggregates here.
1222 // Note that we have to insert the new PHI here, ourselves, because we can't
1223 // rely on InstCombinerImpl::run() inserting it into the right basic block.
1224 // Note that the same block can be a predecessor more than once,
1225 // and we need to preserve that invariant for the PHI node.
1227 Builder.SetInsertPoint(UseBB, UseBB->getFirstNonPHIIt());
1228 auto *PHI =
1229 Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged");
1230 for (BasicBlock *Pred : Preds)
1231 PHI->addIncoming(SourceAggregates[Pred], Pred);
1232
1233 ++NumAggregateReconstructionsSimplified;
1234 return replaceInstUsesWith(OrigIVI, PHI);
1235}
1236
1237/// Try to find redundant insertvalue instructions, like the following ones:
1238/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
1239/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
1240/// Here the second instruction inserts values at the same indices, as the
1241/// first one, making the first one redundant.
1242/// It should be transformed to:
1243/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
1246 I.getAggregateOperand(), I.getInsertedValueOperand(), I.getIndices(),
1247 SQ.getWithInstruction(&I)))
1248 return replaceInstUsesWith(I, V);
1249
1250 bool IsRedundant = false;
1251 ArrayRef<unsigned int> FirstIndices = I.getIndices();
1252
1253 // If there is a chain of insertvalue instructions (each of them except the
1254 // last one has only one use and it's another insertvalue insn from this
1255 // chain), check if any of the 'children' uses the same indices as the first
1256 // instruction. In this case, the first one is redundant.
1257 Value *V = &I;
1258 unsigned Depth = 0;
1259 while (V->hasOneUse() && Depth < 10) {
1260 User *U = V->user_back();
1261 auto UserInsInst = dyn_cast<InsertValueInst>(U);
1262 if (!UserInsInst || U->getOperand(0) != V)
1263 break;
1264 if (UserInsInst->getIndices() == FirstIndices) {
1265 IsRedundant = true;
1266 break;
1267 }
1268 V = UserInsInst;
1269 Depth++;
1270 }
1271
1272 if (IsRedundant)
1273 return replaceInstUsesWith(I, I.getOperand(0));
1274
1276 return NewI;
1277
1278 return nullptr;
1279}
1280
1282 // Can not analyze scalable type, the number of elements is not a compile-time
1283 // constant.
1285 return false;
1286
1287 int MaskSize = Shuf.getShuffleMask().size();
1288 int VecSize =
1289 cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements();
1290
1291 // A vector select does not change the size of the operands.
1292 if (MaskSize != VecSize)
1293 return false;
1294
1295 // Each mask element must be undefined or choose a vector element from one of
1296 // the source operands without crossing vector lanes.
1297 for (int i = 0; i != MaskSize; ++i) {
1298 int Elt = Shuf.getMaskValue(i);
1299 if (Elt != -1 && Elt != i && Elt != i + VecSize)
1300 return false;
1301 }
1302
1303 return true;
1304}
1305
1306/// Turn a chain of inserts that splats a value into an insert + shuffle:
1307/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1308/// shufflevector(insertelt(X, %k, 0), poison, zero)
1310 // We are interested in the last insert in a chain. So if this insert has a
1311 // single user and that user is an insert, bail.
1312 if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
1313 return nullptr;
1314
1315 VectorType *VecTy = InsElt.getType();
1316 // Can not handle scalable type, the number of elements is not a compile-time
1317 // constant.
1318 if (isa<ScalableVectorType>(VecTy))
1319 return nullptr;
1320 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
1321
1322 // Do not try to do this for a one-element vector, since that's a nop,
1323 // and will cause an inf-loop.
1324 if (NumElements == 1)
1325 return nullptr;
1326
1327 Value *SplatVal = InsElt.getOperand(1);
1328 InsertElementInst *CurrIE = &InsElt;
1329 SmallBitVector ElementPresent(NumElements, false);
1330 InsertElementInst *FirstIE = nullptr;
1331
1332 // Walk the chain backwards, keeping track of which indices we inserted into,
1333 // until we hit something that isn't an insert of the splatted value.
1334 while (CurrIE) {
1335 auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
1336 if (!Idx || CurrIE->getOperand(1) != SplatVal)
1337 return nullptr;
1338
1339 auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
1340 // Check none of the intermediate steps have any additional uses, except
1341 // for the root insertelement instruction, which can be re-used, if it
1342 // inserts at position 0.
1343 if (CurrIE != &InsElt &&
1344 (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
1345 return nullptr;
1346
1347 ElementPresent[Idx->getZExtValue()] = true;
1348 FirstIE = CurrIE;
1349 CurrIE = NextIE;
1350 }
1351
1352 // If this is just a single insertelement (not a sequence), we are done.
1353 if (FirstIE == &InsElt)
1354 return nullptr;
1355
1356 // If we are not inserting into a poison vector, make sure we've seen an
1357 // insert into every element.
1358 // TODO: If the base vector is not undef, it might be better to create a splat
1359 // and then a select-shuffle (blend) with the base vector.
1360 if (!match(FirstIE->getOperand(0), m_Poison()))
1361 if (!ElementPresent.all())
1362 return nullptr;
1363
1364 // Create the insert + shuffle.
1365 Type *Int64Ty = Type::getInt64Ty(InsElt.getContext());
1366 PoisonValue *PoisonVec = PoisonValue::get(VecTy);
1367 Constant *Zero = ConstantInt::get(Int64Ty, 0);
1368 if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
1369 FirstIE = InsertElementInst::Create(PoisonVec, SplatVal, Zero, "",
1370 InsElt.getIterator());
1371
1372 // Splat from element 0, but replace absent elements with poison in the mask.
1373 SmallVector<int, 16> Mask(NumElements, 0);
1374 for (unsigned i = 0; i != NumElements; ++i)
1375 if (!ElementPresent[i])
1376 Mask[i] = -1;
1377
1378 return new ShuffleVectorInst(FirstIE, Mask);
1379}
1380
1381/// Try to fold an insert element into an existing splat shuffle by changing
1382/// the shuffle's mask to include the index of this insert element.
1384 // Check if the vector operand of this insert is a canonical splat shuffle.
1385 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1386 if (!Shuf || !Shuf->isZeroEltSplat())
1387 return nullptr;
1388
1389 // Bail out early if shuffle is scalable type. The number of elements in
1390 // shuffle mask is unknown at compile-time.
1391 if (isa<ScalableVectorType>(Shuf->getType()))
1392 return nullptr;
1393
1394 // Check for a constant insertion index.
1395 uint64_t IdxC;
1396 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1397 return nullptr;
1398
1399 // Check if the splat shuffle's input is the same as this insert's scalar op.
1400 Value *X = InsElt.getOperand(1);
1401 Value *Op0 = Shuf->getOperand(0);
1402 if (!match(Op0, m_InsertElt(m_Undef(), m_Specific(X), m_ZeroInt())))
1403 return nullptr;
1404
1405 // Replace the shuffle mask element at the index of this insert with a zero.
1406 // For example:
1407 // inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1
1408 // --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>
1409 unsigned NumMaskElts =
1410 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1411 SmallVector<int, 16> NewMask(NumMaskElts);
1412 for (unsigned i = 0; i != NumMaskElts; ++i)
1413 NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
1414
1415 return new ShuffleVectorInst(Op0, NewMask);
1416}
1417
1418/// Try to fold an extract+insert element into an existing identity shuffle by
1419/// changing the shuffle's mask to include the index of this insert element.
1421 // Check if the vector operand of this insert is an identity shuffle.
1422 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1423 if (!Shuf || !match(Shuf->getOperand(1), m_Poison()) ||
1424 !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
1425 return nullptr;
1426
1427 // Bail out early if shuffle is scalable type. The number of elements in
1428 // shuffle mask is unknown at compile-time.
1429 if (isa<ScalableVectorType>(Shuf->getType()))
1430 return nullptr;
1431
1432 // Check for a constant insertion index.
1433 uint64_t IdxC;
1434 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1435 return nullptr;
1436
1437 // Check if this insert's scalar op is extracted from the identity shuffle's
1438 // input vector.
1439 Value *Scalar = InsElt.getOperand(1);
1440 Value *X = Shuf->getOperand(0);
1441 if (!match(Scalar, m_ExtractElt(m_Specific(X), m_SpecificInt(IdxC))))
1442 return nullptr;
1443
1444 // Replace the shuffle mask element at the index of this extract+insert with
1445 // that same index value.
1446 // For example:
1447 // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
1448 unsigned NumMaskElts =
1449 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1450 SmallVector<int, 16> NewMask(NumMaskElts);
1451 ArrayRef<int> OldMask = Shuf->getShuffleMask();
1452 for (unsigned i = 0; i != NumMaskElts; ++i) {
1453 if (i != IdxC) {
1454 // All mask elements besides the inserted element remain the same.
1455 NewMask[i] = OldMask[i];
1456 } else if (OldMask[i] == (int)IdxC) {
1457 // If the mask element was already set, there's nothing to do
1458 // (demanded elements analysis may unset it later).
1459 return nullptr;
1460 } else {
1461 assert(OldMask[i] == PoisonMaskElem &&
1462 "Unexpected shuffle mask element for identity shuffle");
1463 NewMask[i] = IdxC;
1464 }
1465 }
1466
1467 return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
1468}
1469
1470/// If we have an insertelement instruction feeding into another insertelement
1471/// and the 2nd is inserting a constant into the vector, canonicalize that
1472/// constant insertion before the insertion of a variable:
1473///
1474/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
1475/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
1476///
1477/// This has the potential of eliminating the 2nd insertelement instruction
1478/// via constant folding of the scalar constant into a vector constant.
1480 InstCombiner::BuilderTy &Builder) {
1481 auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
1482 if (!InsElt1 || !InsElt1->hasOneUse())
1483 return nullptr;
1484
1485 Value *X, *Y;
1486 Constant *ScalarC;
1487 ConstantInt *IdxC1, *IdxC2;
1488 if (match(InsElt1->getOperand(0), m_Value(X)) &&
1489 match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
1490 match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
1491 match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
1492 match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
1493 Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
1494 return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
1495 }
1496
1497 return nullptr;
1498}
1499
1500/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
1501/// --> shufflevector X, CVec', Mask'
1503 auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
1504 // Bail out if the parent has more than one use. In that case, we'd be
1505 // replacing the insertelt with a shuffle, and that's not a clear win.
1506 if (!Inst || !Inst->hasOneUse())
1507 return nullptr;
1508 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
1509 // The shuffle must have a constant vector operand. The insertelt must have
1510 // a constant scalar being inserted at a constant position in the vector.
1511 Constant *ShufConstVec, *InsEltScalar;
1512 uint64_t InsEltIndex;
1513 if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
1514 !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
1515 !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
1516 return nullptr;
1517
1518 // Adding an element to an arbitrary shuffle could be expensive, but a
1519 // shuffle that selects elements from vectors without crossing lanes is
1520 // assumed cheap.
1521 // If we're just adding a constant into that shuffle, it will still be
1522 // cheap.
1523 if (!isShuffleEquivalentToSelect(*Shuf))
1524 return nullptr;
1525
1526 // From the above 'select' check, we know that the mask has the same number
1527 // of elements as the vector input operands. We also know that each constant
1528 // input element is used in its lane and can not be used more than once by
1529 // the shuffle. Therefore, replace the constant in the shuffle's constant
1530 // vector with the insertelt constant. Replace the constant in the shuffle's
1531 // mask vector with the insertelt index plus the length of the vector
1532 // (because the constant vector operand of a shuffle is always the 2nd
1533 // operand).
1534 ArrayRef<int> Mask = Shuf->getShuffleMask();
1535 unsigned NumElts = Mask.size();
1536 SmallVector<Constant *, 16> NewShufElts(NumElts);
1537 SmallVector<int, 16> NewMaskElts(NumElts);
1538 for (unsigned I = 0; I != NumElts; ++I) {
1539 if (I == InsEltIndex) {
1540 NewShufElts[I] = InsEltScalar;
1541 NewMaskElts[I] = InsEltIndex + NumElts;
1542 } else {
1543 // Copy over the existing values.
1544 NewShufElts[I] = ShufConstVec->getAggregateElement(I);
1545 NewMaskElts[I] = Mask[I];
1546 }
1547
1548 // Bail if we failed to find an element.
1549 if (!NewShufElts[I])
1550 return nullptr;
1551 }
1552
1553 // Create new operands for a shuffle that includes the constant of the
1554 // original insertelt. The old shuffle will be dead now.
1555 return new ShuffleVectorInst(Shuf->getOperand(0),
1556 ConstantVector::get(NewShufElts), NewMaskElts);
1557 } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
1558 // Transform sequences of insertelements ops with constant data/indexes into
1559 // a single shuffle op.
1560 // Can not handle scalable type, the number of elements needed to create
1561 // shuffle mask is not a compile-time constant.
1562 if (isa<ScalableVectorType>(InsElt.getType()))
1563 return nullptr;
1564 unsigned NumElts =
1565 cast<FixedVectorType>(InsElt.getType())->getNumElements();
1566
1567 uint64_t InsertIdx[2];
1568 Constant *Val[2];
1569 if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
1570 !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
1571 !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
1572 !match(IEI->getOperand(1), m_Constant(Val[1])))
1573 return nullptr;
1574 SmallVector<Constant *, 16> Values(NumElts);
1575 SmallVector<int, 16> Mask(NumElts);
1576 auto ValI = std::begin(Val);
1577 // Generate new constant vector and mask.
1578 // We have 2 values/masks from the insertelements instructions. Insert them
1579 // into new value/mask vectors.
1580 for (uint64_t I : InsertIdx) {
1581 if (!Values[I]) {
1582 Values[I] = *ValI;
1583 Mask[I] = NumElts + I;
1584 }
1585 ++ValI;
1586 }
1587 // Remaining values are filled with 'poison' values.
1588 for (unsigned I = 0; I < NumElts; ++I) {
1589 if (!Values[I]) {
1590 Values[I] = PoisonValue::get(InsElt.getType()->getElementType());
1591 Mask[I] = I;
1592 }
1593 }
1594 // Create new operands for a shuffle that includes the constant of the
1595 // original insertelt.
1596 return new ShuffleVectorInst(IEI->getOperand(0),
1597 ConstantVector::get(Values), Mask);
1598 }
1599 return nullptr;
1600}
1601
1602/// If both the base vector and the inserted element are extended from the same
1603/// type, do the insert element in the narrow source type followed by extend.
1604/// TODO: This can be extended to include other cast opcodes, but particularly
1605/// if we create a wider insertelement, make sure codegen is not harmed.
1607 InstCombiner::BuilderTy &Builder) {
1608 // We are creating a vector extend. If the original vector extend has another
1609 // use, that would mean we end up with 2 vector extends, so avoid that.
1610 // TODO: We could ease the use-clause to "if at least one op has one use"
1611 // (assuming that the source types match - see next TODO comment).
1612 Value *Vec = InsElt.getOperand(0);
1613 if (!Vec->hasOneUse())
1614 return nullptr;
1615
1616 Value *Scalar = InsElt.getOperand(1);
1617 Value *X, *Y;
1618 CastInst::CastOps CastOpcode;
1619 if (match(Vec, m_FPExt(m_Value(X))) && match(Scalar, m_FPExt(m_Value(Y))))
1620 CastOpcode = Instruction::FPExt;
1621 else if (match(Vec, m_SExt(m_Value(X))) && match(Scalar, m_SExt(m_Value(Y))))
1622 CastOpcode = Instruction::SExt;
1623 else if (match(Vec, m_ZExt(m_Value(X))) && match(Scalar, m_ZExt(m_Value(Y))))
1624 CastOpcode = Instruction::ZExt;
1625 else
1626 return nullptr;
1627
1628 // TODO: We can allow mismatched types by creating an intermediate cast.
1629 if (X->getType()->getScalarType() != Y->getType())
1630 return nullptr;
1631
1632 // inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
1633 Value *NewInsElt = Builder.CreateInsertElement(X, Y, InsElt.getOperand(2));
1634 return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType());
1635}
1636
1637/// If we are inserting 2 halves of a value into adjacent elements of a vector,
1638/// try to convert to a single insert with appropriate bitcasts.
1640 bool IsBigEndian,
1641 InstCombiner::BuilderTy &Builder) {
1642 Value *VecOp = InsElt.getOperand(0);
1643 Value *ScalarOp = InsElt.getOperand(1);
1644 Value *IndexOp = InsElt.getOperand(2);
1645
1646 // Pattern depends on endian because we expect lower index is inserted first.
1647 // Big endian:
1648 // inselt (inselt BaseVec, (trunc (lshr X, BW/2), Index0), (trunc X), Index1
1649 // Little endian:
1650 // inselt (inselt BaseVec, (trunc X), Index0), (trunc (lshr X, BW/2)), Index1
1651 // Note: It is not safe to do this transform with an arbitrary base vector
1652 // because the bitcast of that vector to fewer/larger elements could
1653 // allow poison to spill into an element that was not poison before.
1654 // TODO: Detect smaller fractions of the scalar.
1655 // TODO: One-use checks are conservative.
1656 auto *VTy = dyn_cast<FixedVectorType>(InsElt.getType());
1657 Value *Scalar0, *BaseVec;
1658 uint64_t Index0, Index1;
1659 if (!VTy || (VTy->getNumElements() & 1) ||
1660 !match(IndexOp, m_ConstantInt(Index1)) ||
1661 !match(VecOp, m_InsertElt(m_Value(BaseVec), m_Value(Scalar0),
1662 m_ConstantInt(Index0))) ||
1663 !match(BaseVec, m_Undef()))
1664 return nullptr;
1665
1666 // The first insert must be to the index one less than this one, and
1667 // the first insert must be to an even index.
1668 if (Index0 + 1 != Index1 || Index0 & 1)
1669 return nullptr;
1670
1671 // For big endian, the high half of the value should be inserted first.
1672 // For little endian, the low half of the value should be inserted first.
1673 Value *X;
1674 uint64_t ShAmt;
1675 if (IsBigEndian) {
1676 if (!match(ScalarOp, m_Trunc(m_Value(X))) ||
1677 !match(Scalar0, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1678 return nullptr;
1679 } else {
1680 if (!match(Scalar0, m_Trunc(m_Value(X))) ||
1681 !match(ScalarOp, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1682 return nullptr;
1683 }
1684
1685 Type *SrcTy = X->getType();
1686 unsigned ScalarWidth = SrcTy->getScalarSizeInBits();
1687 unsigned VecEltWidth = VTy->getScalarSizeInBits();
1688 if (ScalarWidth != VecEltWidth * 2 || ShAmt != VecEltWidth)
1689 return nullptr;
1690
1691 // Bitcast the base vector to a vector type with the source element type.
1692 Type *CastTy = FixedVectorType::get(SrcTy, VTy->getNumElements() / 2);
1693 Value *CastBaseVec = Builder.CreateBitCast(BaseVec, CastTy);
1694
1695 // Scale the insert index for a vector with half as many elements.
1696 // bitcast (inselt (bitcast BaseVec), X, NewIndex)
1697 uint64_t NewIndex = IsBigEndian ? Index1 / 2 : Index0 / 2;
1698 Value *NewInsert = Builder.CreateInsertElement(CastBaseVec, X, NewIndex);
1699 return new BitCastInst(NewInsert, VTy);
1700}
1701
1703 Value *VecOp = IE.getOperand(0);
1704 Value *ScalarOp = IE.getOperand(1);
1705 Value *IdxOp = IE.getOperand(2);
1706
1707 if (auto *V = simplifyInsertElementInst(
1708 VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
1709 return replaceInstUsesWith(IE, V);
1710
1711 // Canonicalize type of constant indices to i64 to simplify CSE
1712 if (auto *IndexC = dyn_cast<ConstantInt>(IdxOp)) {
1713 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
1714 return replaceOperand(IE, 2, NewIdx);
1715
1716 Value *BaseVec, *OtherScalar;
1717 uint64_t OtherIndexVal;
1718 if (match(VecOp, m_OneUse(m_InsertElt(m_Value(BaseVec),
1719 m_Value(OtherScalar),
1720 m_ConstantInt(OtherIndexVal)))) &&
1721 !isa<Constant>(OtherScalar) && OtherIndexVal > IndexC->getZExtValue()) {
1722 Value *NewIns = Builder.CreateInsertElement(BaseVec, ScalarOp, IdxOp);
1723 return InsertElementInst::Create(NewIns, OtherScalar,
1724 Builder.getInt64(OtherIndexVal));
1725 }
1726 }
1727
1728 // If the scalar is bitcast and inserted into undef, do the insert in the
1729 // source type followed by bitcast.
1730 // TODO: Generalize for insert into any constant, not just undef?
1731 Value *ScalarSrc;
1732 if (match(VecOp, m_Undef()) &&
1733 match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&
1734 (ScalarSrc->getType()->isIntegerTy() ||
1735 ScalarSrc->getType()->isFloatingPointTy())) {
1736 // inselt undef, (bitcast ScalarSrc), IdxOp -->
1737 // bitcast (inselt undef, ScalarSrc, IdxOp)
1738 Type *ScalarTy = ScalarSrc->getType();
1739 Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount());
1740 Constant *NewUndef = isa<PoisonValue>(VecOp) ? PoisonValue::get(VecTy)
1741 : UndefValue::get(VecTy);
1742 Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);
1743 return new BitCastInst(NewInsElt, IE.getType());
1744 }
1745
1746 // If the vector and scalar are both bitcast from the same element type, do
1747 // the insert in that source type followed by bitcast.
1748 Value *VecSrc;
1749 if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
1750 match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
1751 (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
1752 VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1753 cast<VectorType>(VecSrc->getType())->getElementType() ==
1754 ScalarSrc->getType()) {
1755 // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1756 // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1757 Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
1758 return new BitCastInst(NewInsElt, IE.getType());
1759 }
1760
1761 // If the inserted element was extracted from some other fixed-length vector
1762 // and both indexes are valid constants, try to turn this into a shuffle.
1763 // Can not handle scalable vector type, the number of elements needed to
1764 // create shuffle mask is not a compile-time constant.
1765 uint64_t InsertedIdx, ExtractedIdx;
1766 Value *ExtVecOp;
1767 if (isa<FixedVectorType>(IE.getType()) &&
1768 match(IdxOp, m_ConstantInt(InsertedIdx)) &&
1769 match(ScalarOp,
1770 m_ExtractElt(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) &&
1771 isa<FixedVectorType>(ExtVecOp->getType()) &&
1772 ExtractedIdx <
1773 cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) {
1774 // TODO: Looking at the user(s) to determine if this insert is a
1775 // fold-to-shuffle opportunity does not match the usual instcombine
1776 // constraints. We should decide if the transform is worthy based only
1777 // on this instruction and its operands, but that may not work currently.
1778 //
1779 // Here, we are trying to avoid creating shuffles before reaching
1780 // the end of a chain of extract-insert pairs. This is complicated because
1781 // we do not generally form arbitrary shuffle masks in instcombine
1782 // (because those may codegen poorly), but collectShuffleElements() does
1783 // exactly that.
1784 //
1785 // The rules for determining what is an acceptable target-independent
1786 // shuffle mask are fuzzy because they evolve based on the backend's
1787 // capabilities and real-world impact.
1788 auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1789 if (!Insert.hasOneUse())
1790 return true;
1791 auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
1792 if (!InsertUser)
1793 return true;
1794 return false;
1795 };
1796
1797 // Try to form a shuffle from a chain of extract-insert ops.
1798 if (isShuffleRootCandidate(IE)) {
1799 bool Rerun = true;
1800 while (Rerun) {
1801 Rerun = false;
1802
1804 ShuffleOps LR =
1805 collectShuffleElements(&IE, Mask, nullptr, *this, Rerun);
1806
1807 // The proposed shuffle may be trivial, in which case we shouldn't
1808 // perform the combine.
1809 if (LR.first != &IE && LR.second != &IE) {
1810 // We now have a shuffle of LHS, RHS, Mask.
1811 if (LR.second == nullptr)
1812 LR.second = PoisonValue::get(LR.first->getType());
1813 return new ShuffleVectorInst(LR.first, LR.second, Mask);
1814 }
1815 }
1816 }
1817 }
1818
1819 if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
1820 unsigned VWidth = VecTy->getNumElements();
1821 APInt PoisonElts(VWidth, 0);
1822 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1823 if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask,
1824 PoisonElts)) {
1825 if (V != &IE)
1826 return replaceInstUsesWith(IE, V);
1827 return &IE;
1828 }
1829 }
1830
1832 return Shuf;
1833
1834 if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
1835 return NewInsElt;
1836
1837 if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
1838 return Broadcast;
1839
1841 return Splat;
1842
1843 if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
1844 return IdentityShuf;
1845
1846 if (Instruction *Ext = narrowInsElt(IE, Builder))
1847 return Ext;
1848
1849 if (Instruction *Ext = foldTruncInsEltPair(IE, DL.isBigEndian(), Builder))
1850 return Ext;
1851
1852 return nullptr;
1853}
1854
1855/// Return true if we can evaluate the specified expression tree if the vector
1856/// elements were shuffled in a different order.
1858 unsigned Depth = 5) {
1859 // We can always reorder the elements of a constant.
1860 if (isa<Constant>(V))
1861 return true;
1862
1863 // We won't reorder vector arguments. No IPO here.
1865 if (!I) return false;
1866
1867 // Two users may expect different orders of the elements. Don't try it.
1868 if (!I->hasOneUse())
1869 return false;
1870
1871 if (Depth == 0) return false;
1872
1873 switch (I->getOpcode()) {
1874 case Instruction::UDiv:
1875 case Instruction::SDiv:
1876 case Instruction::URem:
1877 case Instruction::SRem:
1878 // Propagating an undefined shuffle mask element to integer div/rem is not
1879 // allowed because those opcodes can create immediate undefined behavior
1880 // from an undefined element in an operand.
1881 if (llvm::is_contained(Mask, -1))
1882 return false;
1883 [[fallthrough]];
1884 case Instruction::Add:
1885 case Instruction::FAdd:
1886 case Instruction::Sub:
1887 case Instruction::FSub:
1888 case Instruction::Mul:
1889 case Instruction::FMul:
1890 case Instruction::FDiv:
1891 case Instruction::FRem:
1892 case Instruction::Shl:
1893 case Instruction::LShr:
1894 case Instruction::AShr:
1895 case Instruction::And:
1896 case Instruction::Or:
1897 case Instruction::Xor:
1898 case Instruction::ICmp:
1899 case Instruction::FCmp:
1900 case Instruction::Trunc:
1901 case Instruction::ZExt:
1902 case Instruction::SExt:
1903 case Instruction::FPToUI:
1904 case Instruction::FPToSI:
1905 case Instruction::UIToFP:
1906 case Instruction::SIToFP:
1907 case Instruction::FPTrunc:
1908 case Instruction::FPExt:
1909 case Instruction::GetElementPtr: {
1910 // Bail out if we would create longer vector ops. We could allow creating
1911 // longer vector ops, but that may result in more expensive codegen.
1912 Type *ITy = I->getType();
1913 if (ITy->isVectorTy() &&
1914 Mask.size() > cast<FixedVectorType>(ITy)->getNumElements())
1915 return false;
1916 for (Value *Operand : I->operands()) {
1917 if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
1918 return false;
1919 }
1920 return true;
1921 }
1922 case Instruction::InsertElement: {
1923 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
1924 if (!CI) return false;
1925 int ElementNumber = CI->getLimitedValue();
1926
1927 // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1928 // can't put an element into multiple indices.
1929 bool SeenOnce = false;
1930 for (int I : Mask) {
1931 if (I == ElementNumber) {
1932 if (SeenOnce)
1933 return false;
1934 SeenOnce = true;
1935 }
1936 }
1937 return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
1938 }
1939 }
1940 return false;
1941}
1942
1943/// Rebuild a new instruction just like 'I' but with the new operands given.
1944/// In the event of type mismatch, the type of the operands is correct.
1946 IRBuilderBase &Builder) {
1947 Builder.SetInsertPoint(I);
1948 switch (I->getOpcode()) {
1949 case Instruction::Add:
1950 case Instruction::FAdd:
1951 case Instruction::Sub:
1952 case Instruction::FSub:
1953 case Instruction::Mul:
1954 case Instruction::FMul:
1955 case Instruction::UDiv:
1956 case Instruction::SDiv:
1957 case Instruction::FDiv:
1958 case Instruction::URem:
1959 case Instruction::SRem:
1960 case Instruction::FRem:
1961 case Instruction::Shl:
1962 case Instruction::LShr:
1963 case Instruction::AShr:
1964 case Instruction::And:
1965 case Instruction::Or:
1966 case Instruction::Xor: {
1968 assert(NewOps.size() == 2 && "binary operator with #ops != 2");
1969 Value *New = Builder.CreateBinOp(cast<BinaryOperator>(I)->getOpcode(),
1970 NewOps[0], NewOps[1]);
1971 if (auto *NewI = dyn_cast<Instruction>(New)) {
1973 NewI->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1974 NewI->setHasNoSignedWrap(BO->hasNoSignedWrap());
1975 }
1977 NewI->setIsExact(BO->isExact());
1978 }
1979 if (isa<FPMathOperator>(BO))
1980 NewI->copyFastMathFlags(I);
1981 }
1982 return New;
1983 }
1984 case Instruction::ICmp:
1985 assert(NewOps.size() == 2 && "icmp with #ops != 2");
1986 return Builder.CreateICmp(cast<ICmpInst>(I)->getPredicate(), NewOps[0],
1987 NewOps[1]);
1988 case Instruction::FCmp:
1989 assert(NewOps.size() == 2 && "fcmp with #ops != 2");
1990 return Builder.CreateFCmp(cast<FCmpInst>(I)->getPredicate(), NewOps[0],
1991 NewOps[1]);
1992 case Instruction::Trunc:
1993 case Instruction::ZExt:
1994 case Instruction::SExt:
1995 case Instruction::FPToUI:
1996 case Instruction::FPToSI:
1997 case Instruction::UIToFP:
1998 case Instruction::SIToFP:
1999 case Instruction::FPTrunc:
2000 case Instruction::FPExt: {
2001 // It's possible that the mask has a different number of elements from
2002 // the original cast. We recompute the destination type to match the mask.
2003 Type *DestTy = VectorType::get(
2004 I->getType()->getScalarType(),
2005 cast<VectorType>(NewOps[0]->getType())->getElementCount());
2006 assert(NewOps.size() == 1 && "cast with #ops != 1");
2007 return Builder.CreateCast(cast<CastInst>(I)->getOpcode(), NewOps[0],
2008 DestTy);
2009 }
2010 case Instruction::GetElementPtr: {
2011 Value *Ptr = NewOps[0];
2012 ArrayRef<Value*> Idx = NewOps.slice(1);
2013 return Builder.CreateGEP(cast<GEPOperator>(I)->getSourceElementType(),
2014 Ptr, Idx, "",
2015 cast<GEPOperator>(I)->getNoWrapFlags());
2016 }
2017 }
2018 llvm_unreachable("failed to rebuild vector instructions");
2019}
2020
2022 IRBuilderBase &Builder) {
2023 // Mask.size() does not need to be equal to the number of vector elements.
2024
2025 assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
2026 Type *EltTy = V->getType()->getScalarType();
2027
2028 if (isa<PoisonValue>(V))
2029 return PoisonValue::get(FixedVectorType::get(EltTy, Mask.size()));
2030
2031 if (match(V, m_Undef()))
2032 return UndefValue::get(FixedVectorType::get(EltTy, Mask.size()));
2033
2035 return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size()));
2036
2037 if (Constant *C = dyn_cast<Constant>(V))
2039 Mask);
2040
2042 switch (I->getOpcode()) {
2043 case Instruction::Add:
2044 case Instruction::FAdd:
2045 case Instruction::Sub:
2046 case Instruction::FSub:
2047 case Instruction::Mul:
2048 case Instruction::FMul:
2049 case Instruction::UDiv:
2050 case Instruction::SDiv:
2051 case Instruction::FDiv:
2052 case Instruction::URem:
2053 case Instruction::SRem:
2054 case Instruction::FRem:
2055 case Instruction::Shl:
2056 case Instruction::LShr:
2057 case Instruction::AShr:
2058 case Instruction::And:
2059 case Instruction::Or:
2060 case Instruction::Xor:
2061 case Instruction::ICmp:
2062 case Instruction::FCmp:
2063 case Instruction::Trunc:
2064 case Instruction::ZExt:
2065 case Instruction::SExt:
2066 case Instruction::FPToUI:
2067 case Instruction::FPToSI:
2068 case Instruction::UIToFP:
2069 case Instruction::SIToFP:
2070 case Instruction::FPTrunc:
2071 case Instruction::FPExt:
2072 case Instruction::Select:
2073 case Instruction::GetElementPtr: {
2075 bool NeedsRebuild =
2076 (Mask.size() !=
2077 cast<FixedVectorType>(I->getType())->getNumElements());
2078 for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
2079 Value *V;
2080 // Recursively call evaluateInDifferentElementOrder on vector arguments
2081 // as well. E.g. GetElementPtr may have scalar operands even if the
2082 // return value is a vector, so we need to examine the operand type.
2083 if (I->getOperand(i)->getType()->isVectorTy())
2084 V = evaluateInDifferentElementOrder(I->getOperand(i), Mask, Builder);
2085 else
2086 V = I->getOperand(i);
2087 NewOps.push_back(V);
2088 NeedsRebuild |= (V != I->getOperand(i));
2089 }
2090 if (NeedsRebuild)
2091 return buildNew(I, NewOps, Builder);
2092 return I;
2093 }
2094 case Instruction::InsertElement: {
2095 int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
2096
2097 // The insertelement was inserting at Element. Figure out which element
2098 // that becomes after shuffling. The answer is guaranteed to be unique
2099 // by CanEvaluateShuffled.
2100 bool Found = false;
2101 int Index = 0;
2102 for (int e = Mask.size(); Index != e; ++Index) {
2103 if (Mask[Index] == Element) {
2104 Found = true;
2105 break;
2106 }
2107 }
2108
2109 // If element is not in Mask, no need to handle the operand 1 (element to
2110 // be inserted). Just evaluate values in operand 0 according to Mask.
2111 if (!Found)
2112 return evaluateInDifferentElementOrder(I->getOperand(0), Mask, Builder);
2113
2114 Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask,
2115 Builder);
2116 Builder.SetInsertPoint(I);
2117 return Builder.CreateInsertElement(V, I->getOperand(1), Index);
2118 }
2119 }
2120 llvm_unreachable("failed to reorder elements of vector instruction!");
2121}
2122
2123// Returns true if the shuffle is extracting a contiguous range of values from
2124// LHS, for example:
2125// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2126// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
2127// Shuffles to: |EE|FF|GG|HH|
2128// +--+--+--+--+
2130 ArrayRef<int> Mask) {
2131 unsigned LHSElems =
2132 cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements();
2133 unsigned MaskElems = Mask.size();
2134 unsigned BegIdx = Mask.front();
2135 unsigned EndIdx = Mask.back();
2136 if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
2137 return false;
2138 for (unsigned I = 0; I != MaskElems; ++I)
2139 if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
2140 return false;
2141 return true;
2142}
2143
2144/// These are the ingredients in an alternate form binary operator as described
2145/// below.
2151 Value *V0 = nullptr, Value *V1 = nullptr) :
2152 Opcode(Opc), Op0(V0), Op1(V1) {}
2153 operator bool() const { return Opcode != 0; }
2154};
2155
2156/// Binops may be transformed into binops with different opcodes and operands.
2157/// Reverse the usual canonicalization to enable folds with the non-canonical
2158/// form of the binop. If a transform is possible, return the elements of the
2159/// new binop. If not, return invalid elements.
2161 Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
2162 Type *Ty = BO->getType();
2163 switch (BO->getOpcode()) {
2164 case Instruction::Shl: {
2165 // shl X, C --> mul X, (1 << C)
2166 Constant *C;
2167 if (match(BO1, m_ImmConstant(C))) {
2169 Instruction::Shl, ConstantInt::get(Ty, 1), C, DL);
2170 assert(ShlOne && "Constant folding of immediate constants failed");
2171 return {Instruction::Mul, BO0, ShlOne};
2172 }
2173 break;
2174 }
2175 case Instruction::Or: {
2176 // or disjoin X, C --> add X, C
2177 if (cast<PossiblyDisjointInst>(BO)->isDisjoint())
2178 return {Instruction::Add, BO0, BO1};
2179 break;
2180 }
2181 case Instruction::Sub:
2182 // sub 0, X --> mul X, -1
2183 if (match(BO0, m_ZeroInt()))
2184 return {Instruction::Mul, BO1, ConstantInt::getAllOnesValue(Ty)};
2185 break;
2186 default:
2187 break;
2188 }
2189 return {};
2190}
2191
2192/// A select shuffle of a select shuffle with a shared operand can be reduced
2193/// to a single select shuffle. This is an obvious improvement in IR, and the
2194/// backend is expected to lower select shuffles efficiently.
2196 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2197
2198 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2200 Shuf.getShuffleMask(Mask);
2201 unsigned NumElts = Mask.size();
2202
2203 // Canonicalize a select shuffle with common operand as Op1.
2204 auto *ShufOp = dyn_cast<ShuffleVectorInst>(Op0);
2205 if (ShufOp && ShufOp->isSelect() &&
2206 (ShufOp->getOperand(0) == Op1 || ShufOp->getOperand(1) == Op1)) {
2207 std::swap(Op0, Op1);
2209 }
2210
2211 ShufOp = dyn_cast<ShuffleVectorInst>(Op1);
2212 if (!ShufOp || !ShufOp->isSelect() ||
2213 (ShufOp->getOperand(0) != Op0 && ShufOp->getOperand(1) != Op0))
2214 return nullptr;
2215
2216 Value *X = ShufOp->getOperand(0), *Y = ShufOp->getOperand(1);
2218 ShufOp->getShuffleMask(Mask1);
2219 assert(Mask1.size() == NumElts && "Vector size changed with select shuffle");
2220
2221 // Canonicalize common operand (Op0) as X (first operand of first shuffle).
2222 if (Y == Op0) {
2223 std::swap(X, Y);
2225 }
2226
2227 // If the mask chooses from X (operand 0), it stays the same.
2228 // If the mask chooses from the earlier shuffle, the other mask value is
2229 // transferred to the combined select shuffle:
2230 // shuf X, (shuf X, Y, M1), M --> shuf X, Y, M'
2231 SmallVector<int, 16> NewMask(NumElts);
2232 for (unsigned i = 0; i != NumElts; ++i)
2233 NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i];
2234
2235 // A select mask with undef elements might look like an identity mask.
2236 assert((ShuffleVectorInst::isSelectMask(NewMask, NumElts) ||
2237 ShuffleVectorInst::isIdentityMask(NewMask, NumElts)) &&
2238 "Unexpected shuffle mask");
2239 return new ShuffleVectorInst(X, Y, NewMask);
2240}
2241
2243 const SimplifyQuery &SQ) {
2244 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2245
2246 // Are we shuffling together some value and that same value after it has been
2247 // modified by a binop with a constant?
2248 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2249 Constant *C;
2250 bool Op0IsBinop;
2251 if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
2252 Op0IsBinop = true;
2253 else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
2254 Op0IsBinop = false;
2255 else
2256 return nullptr;
2257
2258 // The identity constant for a binop leaves a variable operand unchanged. For
2259 // a vector, this is a splat of something like 0, -1, or 1.
2260 // If there's no identity constant for this binop, we're done.
2261 auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
2262 BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
2263 Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
2264 if (!IdC)
2265 return nullptr;
2266
2267 Value *X = Op0IsBinop ? Op1 : Op0;
2268
2269 // Prevent folding in the case the non-binop operand might have NaN values.
2270 // If X can have NaN elements then we have that the floating point math
2271 // operation in the transformed code may not preserve the exact NaN
2272 // bit-pattern -- e.g. `fadd sNaN, 0.0 -> qNaN`.
2273 // This makes the transformation incorrect since the original program would
2274 // have preserved the exact NaN bit-pattern.
2275 // Avoid the folding if X can have NaN elements.
2276 if (Shuf.getType()->getElementType()->isFloatingPointTy() &&
2277 !isKnownNeverNaN(X, SQ))
2278 return nullptr;
2279
2280 // Shuffle identity constants into the lanes that return the original value.
2281 // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
2282 // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
2283 // The existing binop constant vector remains in the same operand position.
2284 ArrayRef<int> Mask = Shuf.getShuffleMask();
2285 Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
2287
2288 bool MightCreatePoisonOrUB =
2290 (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
2291 if (MightCreatePoisonOrUB)
2292 NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true);
2293
2294 // shuf (bop X, C), X, M --> bop X, C'
2295 // shuf X, (bop X, C), M --> bop X, C'
2296 Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
2297 NewBO->copyIRFlags(BO);
2298
2299 // An undef shuffle mask element may propagate as an undef constant element in
2300 // the new binop. That would produce poison where the original code might not.
2301 // If we already made a safe constant, then there's no danger.
2302 if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
2304 return NewBO;
2305}
2306
2307/// If we have an insert of a scalar to a non-zero element of an undefined
2308/// vector and then shuffle that value, that's the same as inserting to the zero
2309/// element and shuffling. Splatting from the zero element is recognized as the
2310/// canonical form of splat.
2312 InstCombiner::BuilderTy &Builder) {
2313 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2314 ArrayRef<int> Mask = Shuf.getShuffleMask();
2315 Value *X;
2316 uint64_t IndexC;
2317
2318 // Match a shuffle that is a splat to a non-zero element.
2320 m_ConstantInt(IndexC)))) ||
2321 !match(Op1, m_Poison()) || match(Mask, m_ZeroMask()) || IndexC == 0)
2322 return nullptr;
2323
2324 // Insert into element 0 of a poison vector.
2325 PoisonValue *PoisonVec = PoisonValue::get(Shuf.getType());
2326 Value *NewIns = Builder.CreateInsertElement(PoisonVec, X, (uint64_t)0);
2327
2328 // Splat from element 0. Any mask element that is poison remains poison.
2329 // For example:
2330 // shuf (inselt poison, X, 2), _, <2,2,undef>
2331 // --> shuf (inselt poison, X, 0), poison, <0,0,undef>
2332 unsigned NumMaskElts =
2333 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2334 SmallVector<int, 16> NewMask(NumMaskElts, 0);
2335 for (unsigned i = 0; i != NumMaskElts; ++i)
2336 if (Mask[i] == PoisonMaskElem)
2337 NewMask[i] = Mask[i];
2338
2339 return new ShuffleVectorInst(NewIns, NewMask);
2340}
2341
2342/// Try to fold shuffles that are the equivalent of a vector select.
2344 if (!Shuf.isSelect())
2345 return nullptr;
2346
2347 // Canonicalize to choose from operand 0 first unless operand 1 is undefined.
2348 // Commuting undef to operand 0 conflicts with another canonicalization.
2349 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2350 if (!match(Shuf.getOperand(1), m_Undef()) &&
2351 Shuf.getMaskValue(0) >= (int)NumElts) {
2352 // TODO: Can we assert that both operands of a shuffle-select are not undef
2353 // (otherwise, it would have been folded by instsimplify?
2354 Shuf.commute();
2355 return &Shuf;
2356 }
2357
2359 return I;
2360
2362 Shuf, getSimplifyQuery().getWithInstruction(&Shuf)))
2363 return I;
2364
2365 BinaryOperator *B0, *B1;
2366 if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
2367 !match(Shuf.getOperand(1), m_BinOp(B1)))
2368 return nullptr;
2369
2370 // If one operand is "0 - X", allow that to be viewed as "X * -1"
2371 // (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired
2372 // with a multiply, we will exit because C0/C1 will not be set.
2373 Value *X, *Y;
2374 Constant *C0 = nullptr, *C1 = nullptr;
2375 bool ConstantsAreOp1;
2376 if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
2377 match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
2378 ConstantsAreOp1 = false;
2379 else if (match(B0, m_CombineOr(m_BinOp(m_Value(X), m_Constant(C0)),
2380 m_Neg(m_Value(X)))) &&
2382 m_Neg(m_Value(Y)))))
2383 ConstantsAreOp1 = true;
2384 else
2385 return nullptr;
2386
2387 // We need matching binops to fold the lanes together.
2390 bool DropNSW = false;
2391 if (ConstantsAreOp1 && Opc0 != Opc1) {
2392 // TODO: We drop "nsw" if shift is converted into multiply because it may
2393 // not be correct when the shift amount is BitWidth - 1. We could examine
2394 // each vector element to determine if it is safe to keep that flag.
2395 if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
2396 DropNSW = true;
2397 if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
2398 assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
2399 Opc0 = AltB0.Opcode;
2400 C0 = cast<Constant>(AltB0.Op1);
2401 } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
2402 assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
2403 Opc1 = AltB1.Opcode;
2404 C1 = cast<Constant>(AltB1.Op1);
2405 }
2406 }
2407
2408 if (Opc0 != Opc1 || !C0 || !C1)
2409 return nullptr;
2410
2411 // The opcodes must be the same. Use a new name to make that clear.
2412 BinaryOperator::BinaryOps BOpc = Opc0;
2413
2414 // Select the constant elements needed for the single binop.
2415 ArrayRef<int> Mask = Shuf.getShuffleMask();
2416 Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask);
2417
2418 // We are moving a binop after a shuffle. When a shuffle has an undefined
2419 // mask element, the result is undefined, but it is not poison or undefined
2420 // behavior. That is not necessarily true for div/rem/shift.
2421 bool MightCreatePoisonOrUB =
2424 if (MightCreatePoisonOrUB)
2426 ConstantsAreOp1);
2427
2428 Value *V;
2429 if (X == Y) {
2430 // Remove a binop and the shuffle by rearranging the constant:
2431 // shuffle (op V, C0), (op V, C1), M --> op V, C'
2432 // shuffle (op C0, V), (op C1, V), M --> op C', V
2433 V = X;
2434 } else {
2435 // If there are 2 different variable operands, we must create a new shuffle
2436 // (select) first, so check uses to ensure that we don't end up with more
2437 // instructions than we started with.
2438 if (!B0->hasOneUse() && !B1->hasOneUse())
2439 return nullptr;
2440
2441 // If we use the original shuffle mask and op1 is *variable*, we would be
2442 // putting an undef into operand 1 of div/rem/shift. This is either UB or
2443 // poison. We do not have to guard against UB when *constants* are op1
2444 // because safe constants guarantee that we do not overflow sdiv/srem (and
2445 // there's no danger for other opcodes).
2446 // TODO: To allow this case, create a new shuffle mask with no undefs.
2447 if (MightCreatePoisonOrUB && !ConstantsAreOp1)
2448 return nullptr;
2449
2450 // Note: In general, we do not create new shuffles in InstCombine because we
2451 // do not know if a target can lower an arbitrary shuffle optimally. In this
2452 // case, the shuffle uses the existing mask, so there is no additional risk.
2453
2454 // Select the variable vectors first, then perform the binop:
2455 // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
2456 // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
2457 V = Builder.CreateShuffleVector(X, Y, Mask);
2458 }
2459
2460 Value *NewBO = ConstantsAreOp1 ? Builder.CreateBinOp(BOpc, V, NewC) :
2461 Builder.CreateBinOp(BOpc, NewC, V);
2462
2463 // Flags are intersected from the 2 source binops. But there are 2 exceptions:
2464 // 1. If we changed an opcode, poison conditions might have changed.
2465 // 2. If the shuffle had undef mask elements, the new binop might have undefs
2466 // where the original code did not. But if we already made a safe constant,
2467 // then there's no danger.
2468 if (auto *NewI = dyn_cast<Instruction>(NewBO)) {
2469 NewI->copyIRFlags(B0);
2470 NewI->andIRFlags(B1);
2471 if (DropNSW)
2472 NewI->setHasNoSignedWrap(false);
2473 if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
2474 NewI->dropPoisonGeneratingFlags();
2475 }
2476 return replaceInstUsesWith(Shuf, NewBO);
2477}
2478
2479/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
2480/// Example (little endian):
2481/// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
2483 bool IsBigEndian) {
2484 // This must be a bitcasted shuffle of 1 vector integer operand.
2485 Type *DestType = Shuf.getType();
2486 Value *X;
2487 if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) ||
2488 !match(Shuf.getOperand(1), m_Poison()) || !DestType->isIntOrIntVectorTy())
2489 return nullptr;
2490
2491 // The source type must have the same number of elements as the shuffle,
2492 // and the source element type must be larger than the shuffle element type.
2493 Type *SrcType = X->getType();
2494 if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||
2495 cast<FixedVectorType>(SrcType)->getNumElements() !=
2496 cast<FixedVectorType>(DestType)->getNumElements() ||
2497 SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)
2498 return nullptr;
2499
2500 assert(Shuf.changesLength() && !Shuf.increasesLength() &&
2501 "Expected a shuffle that decreases length");
2502
2503 // Last, check that the mask chooses the correct low bits for each narrow
2504 // element in the result.
2505 uint64_t TruncRatio =
2506 SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
2507 ArrayRef<int> Mask = Shuf.getShuffleMask();
2508 for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
2509 if (Mask[i] == PoisonMaskElem)
2510 continue;
2511 uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
2512 assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");
2513 if (Mask[i] != (int)LSBIndex)
2514 return nullptr;
2515 }
2516
2517 return new TruncInst(X, DestType);
2518}
2519
2520/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2521/// narrowing (concatenating with poison and extracting back to the original
2522/// length). This allows replacing the wide select with a narrow select.
2524 InstCombiner::BuilderTy &Builder) {
2525 // This must be a narrowing identity shuffle. It extracts the 1st N elements
2526 // of the 1st vector operand of a shuffle.
2527 if (!match(Shuf.getOperand(1), m_Poison()) || !Shuf.isIdentityWithExtract())
2528 return nullptr;
2529
2530 // The vector being shuffled must be a vector select that we can eliminate.
2531 // TODO: The one-use requirement could be eased if X and/or Y are constants.
2532 Value *Cond, *X, *Y;
2533 if (!match(Shuf.getOperand(0),
2535 return nullptr;
2536
2537 // We need a narrow condition value. It must be extended with poison elements
2538 // and have the same number of elements as this shuffle.
2539 unsigned NarrowNumElts =
2540 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2541 Value *NarrowCond;
2542 if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Poison()))) ||
2543 cast<FixedVectorType>(NarrowCond->getType())->getNumElements() !=
2544 NarrowNumElts ||
2545 !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
2546 return nullptr;
2547
2548 // shuf (sel (shuf NarrowCond, poison, WideMask), X, Y), poison, NarrowMask)
2549 // -->
2550 // sel NarrowCond, (shuf X, poison, NarrowMask), (shuf Y, poison, NarrowMask)
2551 Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2552 Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask());
2553 return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
2554}
2555
2556/// Canonicalize FP negate/abs after shuffle.
2558 InstCombiner::BuilderTy &Builder) {
2559 auto *S0 = dyn_cast<Instruction>(Shuf.getOperand(0));
2560 Value *X;
2561 if (!S0 || !match(S0, m_CombineOr(m_FNeg(m_Value(X)), m_FAbs(m_Value(X)))))
2562 return nullptr;
2563
2564 bool IsFNeg = S0->getOpcode() == Instruction::FNeg;
2565
2566 // Match 2-input (binary) shuffle.
2567 auto *S1 = dyn_cast<Instruction>(Shuf.getOperand(1));
2568 Value *Y;
2569 if (!S1 || !match(S1, m_CombineOr(m_FNeg(m_Value(Y)), m_FAbs(m_Value(Y)))) ||
2570 S0->getOpcode() != S1->getOpcode() ||
2571 (!S0->hasOneUse() && !S1->hasOneUse()))
2572 return nullptr;
2573
2574 // shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask)
2575 Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2576 Instruction *NewF;
2577 if (IsFNeg) {
2578 NewF = UnaryOperator::CreateFNeg(NewShuf);
2579 } else {
2581 Shuf.getModule(), Intrinsic::fabs, Shuf.getType());
2582 NewF = CallInst::Create(FAbs, {NewShuf});
2583 }
2584 NewF->copyIRFlags(S0);
2585 NewF->andIRFlags(S1);
2586 return NewF;
2587}
2588
2589/// Canonicalize casts after shuffle.
2591 InstCombiner::BuilderTy &Builder) {
2592 auto *Cast0 = dyn_cast<CastInst>(Shuf.getOperand(0));
2593 if (!Cast0)
2594 return nullptr;
2595
2596 // TODO: Allow other opcodes? That would require easing the type restrictions
2597 // below here.
2598 CastInst::CastOps CastOpcode = Cast0->getOpcode();
2599 switch (CastOpcode) {
2600 case Instruction::SExt:
2601 case Instruction::ZExt:
2602 case Instruction::FPToSI:
2603 case Instruction::FPToUI:
2604 case Instruction::SIToFP:
2605 case Instruction::UIToFP:
2606 break;
2607 default:
2608 return nullptr;
2609 }
2610
2611 VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
2612 VectorType *ShufTy = Shuf.getType();
2613 VectorType *ShufOpTy = cast<VectorType>(Shuf.getOperand(0)->getType());
2614
2615 // TODO: Allow length-increasing shuffles?
2616 if (ShufTy->getElementCount().getKnownMinValue() >
2617 ShufOpTy->getElementCount().getKnownMinValue())
2618 return nullptr;
2619
2620 // shuffle (cast X), Poison, identity-with-extract-mask -->
2621 // cast (shuffle X, Poison, identity-with-extract-mask).
2622 if (isa<PoisonValue>(Shuf.getOperand(1)) && Cast0->hasOneUse() &&
2623 Shuf.isIdentityWithExtract()) {
2624 auto *NewIns = Builder.CreateShuffleVector(Cast0->getOperand(0),
2625 PoisonValue::get(CastSrcTy),
2626 Shuf.getShuffleMask());
2627 return CastInst::Create(Cast0->getOpcode(), NewIns, Shuf.getType());
2628 }
2629
2630 auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));
2631 // Do we have 2 matching cast operands?
2632 if (!Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
2633 Cast0->getSrcTy() != Cast1->getSrcTy())
2634 return nullptr;
2635
2636 // TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
2637 assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
2638 "Expected fixed vector operands for casts and binary shuffle");
2639 if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits())
2640 return nullptr;
2641
2642 // At least one of the operands must have only one use (the shuffle).
2643 if (!Cast0->hasOneUse() && !Cast1->hasOneUse())
2644 return nullptr;
2645
2646 // shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask)
2647 Value *X = Cast0->getOperand(0);
2648 Value *Y = Cast1->getOperand(0);
2649 Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2650 return CastInst::Create(CastOpcode, NewShuf, ShufTy);
2651}
2652
2653/// Try to fold an extract subvector operation.
2655 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2656 if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Poison()))
2657 return nullptr;
2658
2659 // Check if we are extracting all bits of an inserted scalar:
2660 // extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
2661 Value *X;
2662 if (match(Op0, m_BitCast(m_InsertElt(m_Value(), m_Value(X), m_Zero()))) &&
2663 X->getType()->getPrimitiveSizeInBits() ==
2665 return new BitCastInst(X, Shuf.getType());
2666
2667 // Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
2668 Value *Y;
2669 ArrayRef<int> Mask;
2670 if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask))))
2671 return nullptr;
2672
2673 // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
2674 // then combining may result in worse codegen.
2675 if (!Op0->hasOneUse())
2676 return nullptr;
2677
2678 // We are extracting a subvector from a shuffle. Remove excess elements from
2679 // the 1st shuffle mask to eliminate the extract.
2680 //
2681 // This transform is conservatively limited to identity extracts because we do
2682 // not allow arbitrary shuffle mask creation as a target-independent transform
2683 // (because we can't guarantee that will lower efficiently).
2684 //
2685 // If the extracting shuffle has an poison mask element, it transfers to the
2686 // new shuffle mask. Otherwise, copy the original mask element. Example:
2687 // shuf (shuf X, Y, <C0, C1, C2, poison, C4>), poison, <0, poison, 2, 3> -->
2688 // shuf X, Y, <C0, poison, C2, poison>
2689 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2690 SmallVector<int, 16> NewMask(NumElts);
2691 assert(NumElts < Mask.size() &&
2692 "Identity with extract must have less elements than its inputs");
2693
2694 for (unsigned i = 0; i != NumElts; ++i) {
2695 int ExtractMaskElt = Shuf.getMaskValue(i);
2696 int MaskElt = Mask[i];
2697 NewMask[i] = ExtractMaskElt == PoisonMaskElem ? ExtractMaskElt : MaskElt;
2698 }
2699 return new ShuffleVectorInst(X, Y, NewMask);
2700}
2701
2702/// Try to replace a shuffle with an insertelement or try to replace a shuffle
2703/// operand with the operand of an insertelement.
2705 InstCombinerImpl &IC) {
2706 Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
2708 Shuf.getShuffleMask(Mask);
2709
2710 int NumElts = Mask.size();
2711 int InpNumElts = cast<FixedVectorType>(V0->getType())->getNumElements();
2712
2713 // This is a specialization of a fold in SimplifyDemandedVectorElts. We may
2714 // not be able to handle it there if the insertelement has >1 use.
2715 // If the shuffle has an insertelement operand but does not choose the
2716 // inserted scalar element from that value, then we can replace that shuffle
2717 // operand with the source vector of the insertelement.
2718 Value *X;
2719 uint64_t IdxC;
2720 if (match(V0, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2721 // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
2722 if (!is_contained(Mask, (int)IdxC))
2723 return IC.replaceOperand(Shuf, 0, X);
2724 }
2725 if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2726 // Offset the index constant by the vector width because we are checking for
2727 // accesses to the 2nd vector input of the shuffle.
2728 IdxC += InpNumElts;
2729 // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
2730 if (!is_contained(Mask, (int)IdxC))
2731 return IC.replaceOperand(Shuf, 1, X);
2732 }
2733 // For the rest of the transform, the shuffle must not change vector sizes.
2734 // TODO: This restriction could be removed if the insert has only one use
2735 // (because the transform would require a new length-changing shuffle).
2736 if (NumElts != InpNumElts)
2737 return nullptr;
2738
2739 // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
2740 auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
2741 // We need an insertelement with a constant index.
2742 if (!match(V0, m_InsertElt(m_Value(), m_Value(Scalar),
2743 m_ConstantInt(IndexC))))
2744 return false;
2745
2746 // Test the shuffle mask to see if it splices the inserted scalar into the
2747 // operand 1 vector of the shuffle.
2748 int NewInsIndex = -1;
2749 for (int i = 0; i != NumElts; ++i) {
2750 // Ignore undef mask elements.
2751 if (Mask[i] == -1)
2752 continue;
2753
2754 // The shuffle takes elements of operand 1 without lane changes.
2755 if (Mask[i] == NumElts + i)
2756 continue;
2757
2758 // The shuffle must choose the inserted scalar exactly once.
2759 if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
2760 return false;
2761
2762 // The shuffle is placing the inserted scalar into element i.
2763 NewInsIndex = i;
2764 }
2765
2766 assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
2767
2768 // Index is updated to the potentially translated insertion lane.
2769 IndexC = ConstantInt::get(IndexC->getIntegerType(), NewInsIndex);
2770 return true;
2771 };
2772
2773 // If the shuffle is unnecessary, insert the scalar operand directly into
2774 // operand 1 of the shuffle. Example:
2775 // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
2776 Value *Scalar;
2777 ConstantInt *IndexC;
2778 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2779 return InsertElementInst::Create(V1, Scalar, IndexC);
2780
2781 // Try again after commuting shuffle. Example:
2782 // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
2783 // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
2784 std::swap(V0, V1);
2786 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2787 return InsertElementInst::Create(V1, Scalar, IndexC);
2788
2789 return nullptr;
2790}
2791
2793 // Match the operands as identity with padding (also known as concatenation
2794 // with undef) shuffles of the same source type. The backend is expected to
2795 // recreate these concatenations from a shuffle of narrow operands.
2796 auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
2797 auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
2798 if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
2799 !Shuffle1 || !Shuffle1->isIdentityWithPadding())
2800 return nullptr;
2801
2802 // We limit this transform to power-of-2 types because we expect that the
2803 // backend can convert the simplified IR patterns to identical nodes as the
2804 // original IR.
2805 // TODO: If we can verify the same behavior for arbitrary types, the
2806 // power-of-2 checks can be removed.
2807 Value *X = Shuffle0->getOperand(0);
2808 Value *Y = Shuffle1->getOperand(0);
2809 if (X->getType() != Y->getType() ||
2810 !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) ||
2812 cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||
2813 !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) ||
2814 match(X, m_Undef()) || match(Y, m_Undef()))
2815 return nullptr;
2816 assert(match(Shuffle0->getOperand(1), m_Undef()) &&
2817 match(Shuffle1->getOperand(1), m_Undef()) &&
2818 "Unexpected operand for identity shuffle");
2819
2820 // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
2821 // operands directly by adjusting the shuffle mask to account for the narrower
2822 // types:
2823 // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
2824 int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements();
2825 int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements();
2826 assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
2827
2828 ArrayRef<int> Mask = Shuf.getShuffleMask();
2829 SmallVector<int, 16> NewMask(Mask.size(), -1);
2830 for (int i = 0, e = Mask.size(); i != e; ++i) {
2831 if (Mask[i] == -1)
2832 continue;
2833
2834 // If this shuffle is choosing an undef element from 1 of the sources, that
2835 // element is undef.
2836 if (Mask[i] < WideElts) {
2837 if (Shuffle0->getMaskValue(Mask[i]) == -1)
2838 continue;
2839 } else {
2840 if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
2841 continue;
2842 }
2843
2844 // If this shuffle is choosing from the 1st narrow op, the mask element is
2845 // the same. If this shuffle is choosing from the 2nd narrow op, the mask
2846 // element is offset down to adjust for the narrow vector widths.
2847 if (Mask[i] < WideElts) {
2848 assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
2849 NewMask[i] = Mask[i];
2850 } else {
2851 assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
2852 NewMask[i] = Mask[i] - (WideElts - NarrowElts);
2853 }
2854 }
2855 return new ShuffleVectorInst(X, Y, NewMask);
2856}
2857
2858// Splatting the first element of the result of a BinOp, where any of the
2859// BinOp's operands are the result of a first element splat can be simplified to
2860// splatting the first element of the result of the BinOp
2862 if (!match(SVI.getOperand(1), m_Poison()) ||
2863 !match(SVI.getShuffleMask(), m_ZeroMask()) ||
2864 !SVI.getOperand(0)->hasOneUse())
2865 return nullptr;
2866
2867 Value *Op0 = SVI.getOperand(0);
2868 Value *X, *Y;
2870 m_Value(Y))) &&
2871 !match(Op0, m_BinOp(m_Value(X),
2873 return nullptr;
2874 if (X->getType() != Y->getType())
2875 return nullptr;
2876
2877 auto *BinOp = cast<BinaryOperator>(Op0);
2879 return nullptr;
2880
2881 Value *NewBO = Builder.CreateBinOp(BinOp->getOpcode(), X, Y);
2882 if (auto NewBOI = dyn_cast<Instruction>(NewBO))
2883 NewBOI->copyIRFlags(BinOp);
2884
2885 return new ShuffleVectorInst(NewBO, SVI.getShuffleMask());
2886}
2887
2889 Value *LHS = SVI.getOperand(0);
2890 Value *RHS = SVI.getOperand(1);
2891 SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI);
2892 if (auto *V = simplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),
2893 SVI.getType(), ShufQuery))
2894 return replaceInstUsesWith(SVI, V);
2895
2896 if (Instruction *I = simplifyBinOpSplats(SVI))
2897 return I;
2898
2899 // Canonicalize splat shuffle to use poison RHS. Handle this explicitly in
2900 // order to support scalable vectors.
2901 if (match(SVI.getShuffleMask(), m_ZeroMask()) && !isa<PoisonValue>(RHS))
2902 return replaceOperand(SVI, 1, PoisonValue::get(RHS->getType()));
2903
2904 if (isa<ScalableVectorType>(LHS->getType()))
2905 return nullptr;
2906
2907 unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();
2908 unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();
2909
2910 // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
2911 //
2912 // if X and Y are of the same (vector) type, and the element size is not
2913 // changed by the bitcasts, we can distribute the bitcasts through the
2914 // shuffle, hopefully reducing the number of instructions. We make sure that
2915 // at least one bitcast only has one use, so we don't *increase* the number of
2916 // instructions here.
2917 Value *X, *Y;
2918 if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_BitCast(m_Value(Y))) &&
2919 X->getType()->isVectorTy() && X->getType() == Y->getType() &&
2920 X->getType()->getScalarSizeInBits() ==
2921 SVI.getType()->getScalarSizeInBits() &&
2922 (LHS->hasOneUse() || RHS->hasOneUse())) {
2923 Value *V = Builder.CreateShuffleVector(X, Y, SVI.getShuffleMask(),
2924 SVI.getName() + ".uncasted");
2925 return new BitCastInst(V, SVI.getType());
2926 }
2927
2928 ArrayRef<int> Mask = SVI.getShuffleMask();
2929
2930 // Peek through a bitcasted shuffle operand by scaling the mask. If the
2931 // simulated shuffle can simplify, then this shuffle is unnecessary:
2932 // shuf (bitcast X), undef, Mask --> bitcast X'
2933 // TODO: This could be extended to allow length-changing shuffles.
2934 // The transform might also be obsoleted if we allowed canonicalization
2935 // of bitcasted shuffles.
2936 if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
2937 X->getType()->isVectorTy() && VWidth == LHSWidth) {
2938 // Try to create a scaled mask constant.
2939 auto *XType = cast<FixedVectorType>(X->getType());
2940 unsigned XNumElts = XType->getNumElements();
2941 SmallVector<int, 16> ScaledMask;
2942 if (scaleShuffleMaskElts(XNumElts, Mask, ScaledMask)) {
2943 // If the shuffled source vector simplifies, cast that value to this
2944 // shuffle's type.
2945 if (auto *V = simplifyShuffleVectorInst(X, UndefValue::get(XType),
2946 ScaledMask, XType, ShufQuery))
2947 return BitCastInst::Create(Instruction::BitCast, V, SVI.getType());
2948 }
2949 }
2950
2951 // shuffle x, x, mask --> shuffle x, undef, mask'
2952 if (LHS == RHS) {
2953 assert(!match(RHS, m_Undef()) &&
2954 "Shuffle with 2 undef ops not simplified?");
2955 return new ShuffleVectorInst(LHS, createUnaryMask(Mask, LHSWidth));
2956 }
2957
2958 // shuffle undef, x, mask --> shuffle x, undef, mask'
2959 if (match(LHS, m_Undef())) {
2960 SVI.commute();
2961 return &SVI;
2962 }
2963
2965 return I;
2966
2967 if (Instruction *I = foldSelectShuffle(SVI))
2968 return I;
2969
2970 if (Instruction *I = foldTruncShuffle(SVI, DL.isBigEndian()))
2971 return I;
2972
2974 return I;
2975
2977 return I;
2978
2979 if (Instruction *I = foldCastShuffle(SVI, Builder))
2980 return I;
2981
2982 APInt PoisonElts(VWidth, 0);
2983 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2984 if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, PoisonElts)) {
2985 if (V != &SVI)
2986 return replaceInstUsesWith(SVI, V);
2987 return &SVI;
2988 }
2989
2991 return I;
2992
2993 // These transforms have the potential to lose undef knowledge, so they are
2994 // intentionally placed after SimplifyDemandedVectorElts().
2995 if (Instruction *I = foldShuffleWithInsert(SVI, *this))
2996 return I;
2998 return I;
2999
3000 if (match(RHS, m_Constant())) {
3001 if (auto *SI = dyn_cast<SelectInst>(LHS)) {
3002 // We cannot do this fold for elementwise select since ShuffleVector is
3003 // not elementwise.
3004 if (SI->getCondition()->getType()->isIntegerTy() &&
3005 (isa<PoisonValue>(RHS) ||
3006 isGuaranteedNotToBePoison(SI->getCondition()))) {
3007 if (Instruction *I = FoldOpIntoSelect(SVI, SI))
3008 return I;
3009 }
3010 }
3011 if (auto *PN = dyn_cast<PHINode>(LHS)) {
3012 if (Instruction *I = foldOpIntoPhi(SVI, PN, /*AllowMultipleUses=*/true))
3013 return I;
3014 }
3015 }
3016
3017 if (match(RHS, m_Poison()) && canEvaluateShuffled(LHS, Mask)) {
3019 return replaceInstUsesWith(SVI, V);
3020 }
3021
3022 // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
3023 // a non-vector type. We can instead bitcast the original vector followed by
3024 // an extract of the desired element:
3025 //
3026 // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
3027 // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3028 // %1 = bitcast <4 x i8> %sroa to i32
3029 // Becomes:
3030 // %bc = bitcast <16 x i8> %in to <4 x i32>
3031 // %ext = extractelement <4 x i32> %bc, i32 0
3032 //
3033 // If the shuffle is extracting a contiguous range of values from the input
3034 // vector then each use which is a bitcast of the extracted size can be
3035 // replaced. This will work if the vector types are compatible, and the begin
3036 // index is aligned to a value in the casted vector type. If the begin index
3037 // isn't aligned then we can shuffle the original vector (keeping the same
3038 // vector type) before extracting.
3039 //
3040 // This code will bail out if the target type is fundamentally incompatible
3041 // with vectors of the source type.
3042 //
3043 // Example of <16 x i8>, target type i32:
3044 // Index range [4,8): v-----------v Will work.
3045 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
3046 // <16 x i8>: | | | | | | | | | | | | | | | | |
3047 // <4 x i32>: | | | | |
3048 // +-----------+-----------+-----------+-----------+
3049 // Index range [6,10): ^-----------^ Needs an extra shuffle.
3050 // Target type i40: ^--------------^ Won't work, bail.
3051 bool MadeChange = false;
3052 if (isShuffleExtractingFromLHS(SVI, Mask)) {
3053 Value *V = LHS;
3054 unsigned MaskElems = Mask.size();
3055 auto *SrcTy = cast<FixedVectorType>(V->getType());
3056 unsigned VecBitWidth = DL.getTypeSizeInBits(SrcTy);
3057 unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
3058 assert(SrcElemBitWidth && "vector elements must have a bitwidth");
3059 unsigned SrcNumElems = SrcTy->getNumElements();
3062 for (User *U : SVI.users())
3063 if (BitCastInst *BC = dyn_cast<BitCastInst>(U)) {
3064 // Only visit bitcasts that weren't previously handled.
3065 if (BC->use_empty())
3066 continue;
3067 // Prefer to combine bitcasts of bitcasts before attempting this fold.
3068 if (BC->hasOneUse()) {
3069 auto *BC2 = dyn_cast<BitCastInst>(BC->user_back());
3070 if (BC2 && isEliminableCastPair(BC, BC2))
3071 continue;
3072 }
3073 BCs.push_back(BC);
3074 }
3075 for (BitCastInst *BC : BCs) {
3076 unsigned BegIdx = Mask.front();
3077 Type *TgtTy = BC->getDestTy();
3078 unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
3079 if (!TgtElemBitWidth)
3080 continue;
3081 unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
3082 bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
3083 bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
3084 if (!VecBitWidthsEqual)
3085 continue;
3087 continue;
3088 auto *CastSrcTy = FixedVectorType::get(TgtTy, TgtNumElems);
3089 if (!BegIsAligned) {
3090 // Shuffle the input so [0,NumElements) contains the output, and
3091 // [NumElems,SrcNumElems) is undef.
3092 SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);
3093 for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
3094 ShuffleMask[I] = Idx;
3095 V = Builder.CreateShuffleVector(V, ShuffleMask,
3096 SVI.getName() + ".extract");
3097 BegIdx = 0;
3098 }
3099 unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
3100 assert(SrcElemsPerTgtElem);
3101 BegIdx /= SrcElemsPerTgtElem;
3102 auto [It, Inserted] = NewBCs.try_emplace(CastSrcTy);
3103 if (Inserted)
3104 It->second = Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
3105 auto *Ext = Builder.CreateExtractElement(It->second, BegIdx,
3106 SVI.getName() + ".extract");
3107 // The shufflevector isn't being replaced: the bitcast that used it
3108 // is. InstCombine will visit the newly-created instructions.
3109 replaceInstUsesWith(*BC, Ext);
3110 MadeChange = true;
3111 }
3112 }
3113
3114 // If the LHS is a shufflevector itself, see if we can combine it with this
3115 // one without producing an unusual shuffle.
3116 // Cases that might be simplified:
3117 // 1.
3118 // x1=shuffle(v1,v2,mask1)
3119 // x=shuffle(x1,undef,mask)
3120 // ==>
3121 // x=shuffle(v1,undef,newMask)
3122 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
3123 // 2.
3124 // x1=shuffle(v1,undef,mask1)
3125 // x=shuffle(x1,x2,mask)
3126 // where v1.size() == mask1.size()
3127 // ==>
3128 // x=shuffle(v1,x2,newMask)
3129 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
3130 // 3.
3131 // x2=shuffle(v2,undef,mask2)
3132 // x=shuffle(x1,x2,mask)
3133 // where v2.size() == mask2.size()
3134 // ==>
3135 // x=shuffle(x1,v2,newMask)
3136 // newMask[i] = (mask[i] < x1.size())
3137 // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
3138 // 4.
3139 // x1=shuffle(v1,undef,mask1)
3140 // x2=shuffle(v2,undef,mask2)
3141 // x=shuffle(x1,x2,mask)
3142 // where v1.size() == v2.size()
3143 // ==>
3144 // x=shuffle(v1,v2,newMask)
3145 // newMask[i] = (mask[i] < x1.size())
3146 // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
3147 //
3148 // Here we are really conservative:
3149 // we are absolutely afraid of producing a shuffle mask not in the input
3150 // program, because the code gen may not be smart enough to turn a merged
3151 // shuffle into two specific shuffles: it may produce worse code. As such,
3152 // we only merge two shuffles if the result is either a splat or one of the
3153 // input shuffle masks. In this case, merging the shuffles just removes
3154 // one instruction, which we know is safe. This is good for things like
3155 // turning: (splat(splat)) -> splat, or
3156 // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
3159 if (LHSShuffle)
3160 if (!match(LHSShuffle->getOperand(1), m_Poison()) &&
3161 !match(RHS, m_Poison()))
3162 LHSShuffle = nullptr;
3163 if (RHSShuffle)
3164 if (!match(RHSShuffle->getOperand(1), m_Poison()))
3165 RHSShuffle = nullptr;
3166 if (!LHSShuffle && !RHSShuffle)
3167 return MadeChange ? &SVI : nullptr;
3168
3169 Value* LHSOp0 = nullptr;
3170 Value* LHSOp1 = nullptr;
3171 Value* RHSOp0 = nullptr;
3172 unsigned LHSOp0Width = 0;
3173 unsigned RHSOp0Width = 0;
3174 if (LHSShuffle) {
3175 LHSOp0 = LHSShuffle->getOperand(0);
3176 LHSOp1 = LHSShuffle->getOperand(1);
3177 LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements();
3178 }
3179 if (RHSShuffle) {
3180 RHSOp0 = RHSShuffle->getOperand(0);
3181 RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements();
3182 }
3183 Value* newLHS = LHS;
3184 Value* newRHS = RHS;
3185 if (LHSShuffle) {
3186 // case 1
3187 if (match(RHS, m_Poison())) {
3188 newLHS = LHSOp0;
3189 newRHS = LHSOp1;
3190 }
3191 // case 2 or 4
3192 else if (LHSOp0Width == LHSWidth) {
3193 newLHS = LHSOp0;
3194 }
3195 }
3196 // case 3 or 4
3197 if (RHSShuffle && RHSOp0Width == LHSWidth) {
3198 newRHS = RHSOp0;
3199 }
3200 // case 4
3201 if (LHSOp0 == RHSOp0) {
3202 newLHS = LHSOp0;
3203 newRHS = nullptr;
3204 }
3205
3206 if (newLHS == LHS && newRHS == RHS)
3207 return MadeChange ? &SVI : nullptr;
3208
3209 ArrayRef<int> LHSMask;
3210 ArrayRef<int> RHSMask;
3211 if (newLHS != LHS)
3212 LHSMask = LHSShuffle->getShuffleMask();
3213 if (RHSShuffle && newRHS != RHS)
3214 RHSMask = RHSShuffle->getShuffleMask();
3215
3216 unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
3217 SmallVector<int, 16> newMask;
3218 bool isSplat = true;
3219 int SplatElt = -1;
3220 // Create a new mask for the new ShuffleVectorInst so that the new
3221 // ShuffleVectorInst is equivalent to the original one.
3222 for (unsigned i = 0; i < VWidth; ++i) {
3223 int eltMask;
3224 if (Mask[i] < 0) {
3225 // This element is a poison value.
3226 eltMask = -1;
3227 } else if (Mask[i] < (int)LHSWidth) {
3228 // This element is from left hand side vector operand.
3229 //
3230 // If LHS is going to be replaced (case 1, 2, or 4), calculate the
3231 // new mask value for the element.
3232 if (newLHS != LHS) {
3233 eltMask = LHSMask[Mask[i]];
3234 // If the value selected is an poison value, explicitly specify it
3235 // with a -1 mask value.
3236 if (eltMask >= (int)LHSOp0Width && isa<PoisonValue>(LHSOp1))
3237 eltMask = -1;
3238 } else
3239 eltMask = Mask[i];
3240 } else {
3241 // This element is from right hand side vector operand
3242 //
3243 // If the value selected is a poison value, explicitly specify it
3244 // with a -1 mask value. (case 1)
3245 if (match(RHS, m_Poison()))
3246 eltMask = -1;
3247 // If RHS is going to be replaced (case 3 or 4), calculate the
3248 // new mask value for the element.
3249 else if (newRHS != RHS) {
3250 eltMask = RHSMask[Mask[i]-LHSWidth];
3251 // If the value selected is an poison value, explicitly specify it
3252 // with a -1 mask value.
3253 if (eltMask >= (int)RHSOp0Width) {
3254 assert(match(RHSShuffle->getOperand(1), m_Poison()) &&
3255 "should have been check above");
3256 eltMask = -1;
3257 }
3258 } else
3259 eltMask = Mask[i]-LHSWidth;
3260
3261 // If LHS's width is changed, shift the mask value accordingly.
3262 // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
3263 // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
3264 // If newRHS == newLHS, we want to remap any references from newRHS to
3265 // newLHS so that we can properly identify splats that may occur due to
3266 // obfuscation across the two vectors.
3267 if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
3268 eltMask += newLHSWidth;
3269 }
3270
3271 // Check if this could still be a splat.
3272 if (eltMask >= 0) {
3273 if (SplatElt >= 0 && SplatElt != eltMask)
3274 isSplat = false;
3275 SplatElt = eltMask;
3276 }
3277
3278 newMask.push_back(eltMask);
3279 }
3280
3281 // If the result mask is equal to one of the original shuffle masks,
3282 // or is a splat, do the replacement.
3283 if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
3284 if (!newRHS)
3285 newRHS = PoisonValue::get(newLHS->getType());
3286 return new ShuffleVectorInst(newLHS, newRHS, newMask);
3287 }
3288
3289 return MadeChange ? &SVI : nullptr;
3290}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
Hexagon Common GEP
This file provides internal interfaces used to implement the InstCombine.
static Instruction * foldConstantInsEltIntoShuffle(InsertElementInst &InsElt)
insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex --> shufflevector X,...
static Value * evaluateInDifferentElementOrder(Value *V, ArrayRef< int > Mask, IRBuilderBase &Builder)
static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl< int > &Mask)
If V is a shuffle of values that ONLY returns elements from either LHS or RHS, return the shuffle mas...
static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl< int > &Mask, Value *PermittedRHS, InstCombinerImpl &IC, bool &Rerun)
static APInt findDemandedEltsByAllUsers(Value *V)
Find union of elements of V demanded by all its users.
static Instruction * foldTruncInsEltPair(InsertElementInst &InsElt, bool IsBigEndian, InstCombiner::BuilderTy &Builder)
If we are inserting 2 halves of a value into adjacent elements of a vector, try to convert to a singl...
static Instruction * foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf, const SimplifyQuery &SQ)
static Instruction * foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf)
static Instruction * foldIdentityExtractShuffle(ShuffleVectorInst &Shuf)
Try to fold an extract subvector operation.
static bool findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr, APInt &UnionUsedElts)
Find elements of V demanded by UserInstr.
static Instruction * foldInsEltIntoSplat(InsertElementInst &InsElt)
Try to fold an insert element into an existing splat shuffle by changing the shuffle's mask to includ...
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
static Instruction * foldShuffleWithInsert(ShuffleVectorInst &Shuf, InstCombinerImpl &IC)
Try to replace a shuffle with an insertelement or try to replace a shuffle operand with the operand o...
static Instruction * canonicalizeInsertSplat(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
If we have an insert of a scalar to a non-zero element of an undefined vector and then shuffle that v...
static Instruction * foldTruncShuffle(ShuffleVectorInst &Shuf, bool IsBigEndian)
Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
static bool replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, InstCombinerImpl &IC)
If we have insertion into a vector that is wider than the vector that we are extracting from,...
static bool cheapToScalarize(Value *V, Value *EI)
Return true if the value is cheaper to scalarize than it is to leave as a vector operation.
static Value * buildNew(Instruction *I, ArrayRef< Value * > NewOps, IRBuilderBase &Builder)
Rebuild a new instruction just like 'I' but with the new operands given.
static bool canEvaluateShuffled(Value *V, ArrayRef< int > Mask, unsigned Depth=5)
Return true if we can evaluate the specified expression tree if the vector elements were shuffled in ...
static Instruction * foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf)
A select shuffle of a select shuffle with a shared operand can be reduced to a single select shuffle.
static Instruction * hoistInsEltConst(InsertElementInst &InsElt2, InstCombiner::BuilderTy &Builder)
If we have an insertelement instruction feeding into another insertelement and the 2nd is inserting a...
static Instruction * foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize FP negate/abs after shuffle.
static Instruction * foldCastShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize casts after shuffle.
static Instruction * narrowInsElt(InsertElementInst &InsElt, InstCombiner::BuilderTy &Builder)
If both the base vector and the inserted element are extended from the same type, do the insert eleme...
static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf)
static Instruction * foldInsSequenceIntoSplat(InsertElementInst &InsElt)
Turn a chain of inserts that splats a value into an insert + shuffle: insertelt(insertelt(insertelt(i...
static Instruction * foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt)
Try to fold an extract+insert element into an existing identity shuffle by changing the shuffle's mas...
static ConstantInt * getPreferredVectorIndex(ConstantInt *IndexC)
Given a constant index for a extractelement or insertelement instruction, return it with the canonica...
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, ArrayRef< int > Mask)
static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL)
Binops may be transformed into binops with different opcodes and operands.
This file provides the interface for the instcombine pass implementation.
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define I(x, y, z)
Definition MD5.cpp:57
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file implements the SmallBitVector class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget)
If both arms of a vector select are concatenated vectors, split the select, and concatenate the resul...
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1513
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1331
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1112
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & front() const
front - Get the first element.
Definition ArrayRef.h:145
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:186
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
BinaryOps getOpcode() const
Definition InstrTypes.h:374
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:219
This class represents a no-op cast from one type to another.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class is the base class for the comparison instructions.
Definition InstrTypes.h:664
static LLVM_ABI CmpInst * CreateWithCopiedFlags(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Instruction *FlagsSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate, the two operands and the instructio...
OtherOps getOpcode() const
Get the opcode casted to the right type.
Definition InstrTypes.h:760
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * getShuffleVector(Constant *V1, Constant *V2, ArrayRef< int > Mask, Type *OnlyIfReducedTy=nullptr)
static LLVM_ABI Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:272
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:165
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:171
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:162
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
This instruction extracts a single (scalar) element from a VectorType value.
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
VectorType * getVectorOperandType() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
LLVM_ABI void setNoWrapFlags(GEPNoWrapFlags NW)
Set nowrap flags for GEP instruction.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This instruction inserts a single (scalar) element into a VectorType value.
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
VectorType * getType() const
Overload to return most specific vector type.
This instruction inserts a struct field of array element value into an aggregate value.
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
Instruction * foldSelectShuffle(ShuffleVectorInst &Shuf)
Try to fold shuffles that are the equivalent of a vector select.
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false, bool SimplifyBothArms=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * visitInsertValueInst(InsertValueInst &IV)
Try to find redundant insertvalue instructions, like the following ones: %0 = insertvalue { i8,...
Instruction * visitInsertElementInst(InsertElementInst &IE)
Instruction * visitExtractElementInst(ExtractElementInst &EI)
Instruction * simplifyBinOpSplats(ShuffleVectorInst &SVI)
Instruction * foldAggregateConstructionIntoAggregateReuse(InsertValueInst &OrigIVI)
Look for chain of insertvalue's that fully define an aggregate, and trace back the values inserted,...
Instruction * visitShuffleVectorInst(ShuffleVectorInst &SVI)
SimplifyQuery SQ
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
const DataLayout & DL
void addToWorklist(Instruction *I)
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
BuilderTy & Builder
const SimplifyQuery & getSimplifyQuery() const
LLVM_ABI bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
LLVM_ABI bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool isShift() const
LLVM_ABI void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
bool isIntDivRem() const
A wrapper class for inspecting calls to intrinsic functions.
std::pair< iterator, bool > try_emplace(const KeyT &Key, Ts &&...Args)
Definition MapVector.h:116
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition Constants.h:1481
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
bool changesLength() const
Return true if this shuffle returns a vector with a different number of elements than its source vect...
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
static LLVM_ABI bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
VectorType * getType() const
Overload to return most specific vector type.
bool increasesLength() const
Return true if this shuffle returns a vector with a greater number of elements than its source vector...
LLVM_ABI bool isIdentityWithExtract() const
Return true if this shuffle extracts the first N elements of exactly one source vector.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
LLVM_ABI void commute()
Swap the operands and adjust the mask to preserve the semantics of the instruction.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool all() const
Returns true if all bits are set.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:246
LLVM_ABI unsigned getStructNumElements() const
LLVM_ABI uint64_t getArrayNumElements() const
@ ArrayTyID
Arrays.
Definition Type.h:74
@ StructTyID
Structures.
Definition Type.h:73
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
TypeID getTypeID() const
Return the type id for the type.
Definition Type.h:136
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:139
UnaryOps getOpcode() const
Definition InstrTypes.h:154
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Value * getOperand(unsigned i) const
Definition User.h:233
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition Value.cpp:1098
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
iterator_range< user_iterator > users()
Definition Value.h:426
User * user_back()
Definition Value.h:412
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1106
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Type * getElementType() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
BinaryOpc_match< LHS, RHS, false > m_BinOp(unsigned Opcode, const LHS &L, const RHS &R)
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
class_match< UnaryOperator > m_UnOp()
Match an arbitrary unary operation and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2530
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
LLVM_ABI Value * simplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef< int > Mask, Type *RetTy, const SimplifyQuery &Q)
Given operands for a ShuffleVectorInst, fold the result or return null.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI Value * simplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an InsertValueInst, fold the result or return null.
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
constexpr int PoisonMaskElem
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I, bool IgnoreUBImplyingAttrs=true)
Don't use information from its non-constant operands.
LLVM_ABI Value * simplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, const SimplifyQuery &Q)
Given operands for an InsertElement, fold the result or return null.
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2009
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
LLVM_ABI Value * simplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q)
Given operands for an ExtractElementInst, fold the result or return null.
LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
These are the ingredients in an alternate form binary operator as described below.
BinopElts(BinaryOperator::BinaryOps Opc=(BinaryOperator::BinaryOps) 0, Value *V0=nullptr, Value *V1=nullptr)
BinaryOperator::BinaryOps Opcode
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:276