LLVM 23.0.0git
InstCombineVectorOps.cpp
Go to the documentation of this file.
1//===- InstCombineVectorOps.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements instcombine for ExtractElement, InsertElement and
10// ShuffleVector.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombineInternal.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/Constant.h"
26#include "llvm/IR/Constants.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
31#include "llvm/IR/Operator.h"
33#include "llvm/IR/Type.h"
34#include "llvm/IR/User.h"
35#include "llvm/IR/Value.h"
39#include <cassert>
40#include <cstdint>
41#include <iterator>
42#include <utility>
43
44#define DEBUG_TYPE "instcombine"
45
46using namespace llvm;
47using namespace PatternMatch;
48
49STATISTIC(NumAggregateReconstructionsSimplified,
50 "Number of aggregate reconstructions turned into reuse of the "
51 "original aggregate");
52
53/// Return true if the value is cheaper to scalarize than it is to leave as a
54/// vector operation. If the extract index \p EI is a constant integer then
55/// some operations may be cheap to scalarize.
56///
57/// FIXME: It's possible to create more instructions than previously existed.
58static bool cheapToScalarize(Value *V, Value *EI) {
60
61 // If we can pick a scalar constant value out of a vector, that is free.
62 if (auto *C = dyn_cast<Constant>(V))
63 return CEI || C->getSplatValue();
64
66 ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
67 // Index needs to be lower than the minimum size of the vector, because
68 // for scalable vector, the vector size is known at run time.
69 return CEI->getValue().ult(EC.getKnownMinValue());
70 }
71
72 // An insertelement to the same constant index as our extract will simplify
73 // to the scalar inserted element. An insertelement to a different constant
74 // index is irrelevant to our extract.
76 return CEI;
77
78 if (match(V, m_OneUse(m_Load(m_Value()))))
79 return true;
80
81 if (match(V, m_OneUse(m_UnOp())))
82 return true;
83
84 Value *V0, *V1;
85 if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
86 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
87 return true;
88
89 CmpPredicate UnusedPred;
90 if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
91 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
92 return true;
93
94 return false;
95}
96
97// If we have a PHI node with a vector type that is only used to feed
98// itself and be an operand of extractelement at a constant location,
99// try to replace the PHI of the vector type with a PHI of a scalar type.
100Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
101 PHINode *PN) {
102 SmallVector<Instruction *, 2> Extracts;
103 // The users we want the PHI to have are:
104 // 1) The EI ExtractElement (we already know this)
105 // 2) Possibly more ExtractElements with the same index.
106 // 3) Another operand, which will feed back into the PHI.
107 Instruction *PHIUser = nullptr;
108 for (auto *U : PN->users()) {
109 if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
110 if (EI.getIndexOperand() == EU->getIndexOperand())
111 Extracts.push_back(EU);
112 else
113 return nullptr;
114 } else if (!PHIUser) {
115 PHIUser = cast<Instruction>(U);
116 } else {
117 return nullptr;
118 }
119 }
120
121 if (!PHIUser)
122 return nullptr;
123
124 // Verify that this PHI user has one use, which is the PHI itself,
125 // and that it is a binary operation which is cheap to scalarize.
126 // otherwise return nullptr.
127 if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
128 !(isa<BinaryOperator>(PHIUser)) ||
129 !cheapToScalarize(PHIUser, EI.getIndexOperand()))
130 return nullptr;
131
132 // Create a scalar PHI node that will replace the vector PHI node
133 // just before the current PHI node.
134 PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
136 // Scalarize each PHI operand. A switch may produce multiple edges from the
137 // same predecessor; reuse the scalar instruction for duplicate edges.
138 SmallDenseMap<BasicBlock *, Value *, 4> ScalarizedValues;
139 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
140 Value *PHIInVal = PN->getIncomingValue(i);
141 BasicBlock *inBB = PN->getIncomingBlock(i);
142 Value *Elt = EI.getIndexOperand();
143
144 // Reuse scalar value for duplicate edges from the same predecessor.
145 if (Value *Existing = ScalarizedValues.lookup(inBB)) {
146 scalarPHI->addIncoming(Existing, inBB);
147 continue;
148 }
149
150 Value *ScalarVal;
151 // If the operand is the PHI induction variable:
152 if (PHIInVal == PHIUser) {
153 // Scalarize the binary operation. One operand is the
154 // scalar PHI, and the other is extracted from the other
155 // vector operand.
156 BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
157 unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
160 B0->getOperand(opId)->getName() + ".Elt"),
161 B0->getIterator());
162 // Preserve operand order for binary operation to preserve semantics of
163 // non-commutative operations.
164 Value *FirstOp = (B0->getOperand(0) == PN) ? scalarPHI : Op;
165 Value *SecondOp = (B0->getOperand(0) == PN) ? Op : scalarPHI;
167 B0->getOpcode(), FirstOp, SecondOp, B0),
168 B0->getIterator());
169 } else {
170 // Scalarize PHI input:
171 Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
172 // Insert the new instruction into the predecessor basic block.
173 Instruction *pos = dyn_cast<Instruction>(PHIInVal);
174 BasicBlock::iterator InsertPos;
175 if (pos && !isa<PHINode>(pos)) {
176 InsertPos = ++pos->getIterator();
177 } else {
178 InsertPos = inBB->getFirstInsertionPt();
179 }
180
181 ScalarVal = InsertNewInstWith(newEI, InsertPos);
182 }
183
184 ScalarizedValues[inBB] = ScalarVal;
185 scalarPHI->addIncoming(ScalarVal, inBB);
186 }
187
188 for (auto *E : Extracts) {
189 replaceInstUsesWith(*E, scalarPHI);
190 // Add old extract to worklist for DCE.
192 }
193
194 return &EI;
195}
196
197Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
198 Value *X;
199 uint64_t ExtIndexC;
200 if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
201 !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
202 return nullptr;
203
204 ElementCount NumElts =
205 cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
206 Type *DestTy = Ext.getType();
207 unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
208 bool IsBigEndian = DL.isBigEndian();
209
210 // If we are casting an integer to vector and extracting a portion, that is
211 // a shift-right and truncate.
212 if (X->getType()->isIntegerTy()) {
214 "Expected fixed vector type for bitcast from scalar integer");
215
216 // Big endian requires adjusting the extract index since MSB is at index 0.
217 // LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8
218 // BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8
219 if (IsBigEndian)
220 ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
221 unsigned ShiftAmountC = ExtIndexC * DestWidth;
222 if ((!ShiftAmountC ||
223 isDesirableIntType(X->getType()->getPrimitiveSizeInBits())) &&
224 Ext.getVectorOperand()->hasOneUse()) {
225 if (ShiftAmountC)
226 X = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset");
227 if (DestTy->isFloatingPointTy()) {
228 Type *DstIntTy = IntegerType::getIntNTy(X->getContext(), DestWidth);
229 Value *Trunc = Builder.CreateTrunc(X, DstIntTy);
230 return new BitCastInst(Trunc, DestTy);
231 }
232 return new TruncInst(X, DestTy);
233 }
234 }
235
236 if (!X->getType()->isVectorTy())
237 return nullptr;
238
239 // If this extractelement is using a bitcast from a vector of the same number
240 // of elements, see if we can find the source element from the source vector:
241 // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
242 auto *SrcTy = cast<VectorType>(X->getType());
243 ElementCount NumSrcElts = SrcTy->getElementCount();
244 if (NumSrcElts == NumElts)
245 if (Value *Elt = findScalarElement(X, ExtIndexC))
246 return new BitCastInst(Elt, DestTy);
247
248 assert(NumSrcElts.isScalable() == NumElts.isScalable() &&
249 "Src and Dst must be the same sort of vector type");
250
251 // If the source elements are wider than the destination, try to shift and
252 // truncate a subset of scalar bits of an insert op.
253 if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
254 Value *Scalar;
255 Value *Vec;
256 uint64_t InsIndexC;
257 if (!match(X, m_InsertElt(m_Value(Vec), m_Value(Scalar),
258 m_ConstantInt(InsIndexC))))
259 return nullptr;
260
261 // The extract must be from the subset of vector elements that we inserted
262 // into. Example: if we inserted element 1 of a <2 x i64> and we are
263 // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
264 // of elements 4-7 of the bitcasted vector.
265 unsigned NarrowingRatio =
266 NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
267
268 if (ExtIndexC / NarrowingRatio != InsIndexC) {
269 // Remove insertelement, if we don't use the inserted element.
270 // extractelement (bitcast (insertelement (Vec, b)), a) ->
271 // extractelement (bitcast (Vec), a)
272 // FIXME: this should be removed to SimplifyDemandedVectorElts,
273 // once scale vectors are supported.
274 if (X->hasOneUse() && Ext.getVectorOperand()->hasOneUse()) {
275 Value *NewBC = Builder.CreateBitCast(Vec, Ext.getVectorOperandType());
276 return ExtractElementInst::Create(NewBC, Ext.getIndexOperand());
277 }
278 return nullptr;
279 }
280
281 // We are extracting part of the original scalar. How that scalar is
282 // inserted into the vector depends on the endian-ness. Example:
283 // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
284 // +--+--+--+--+--+--+--+--+
285 // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
286 // extelt <4 x i16> V', 3: | |S2|S3|
287 // +--+--+--+--+--+--+--+--+
288 // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
289 // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
290 // In this example, we must right-shift little-endian. Big-endian is just a
291 // truncate.
292 unsigned Chunk = ExtIndexC % NarrowingRatio;
293 if (IsBigEndian)
294 Chunk = NarrowingRatio - 1 - Chunk;
295
296 // Bail out if this is an FP vector to FP vector sequence. That would take
297 // more instructions than we started with unless there is no shift, and it
298 // may not be handled as well in the backend.
299 bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
300 bool NeedDestBitcast = DestTy->isFloatingPointTy();
301 if (NeedSrcBitcast && NeedDestBitcast)
302 return nullptr;
303
304 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
305 unsigned ShAmt = Chunk * DestWidth;
306
307 // TODO: This limitation is more strict than necessary. We could sum the
308 // number of new instructions and subtract the number eliminated to know if
309 // we can proceed.
310 if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
311 if (NeedSrcBitcast || NeedDestBitcast)
312 return nullptr;
313
314 if (NeedSrcBitcast) {
315 Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
316 Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
317 }
318
319 if (ShAmt) {
320 // Bail out if we could end with more instructions than we started with.
321 if (!Ext.getVectorOperand()->hasOneUse())
322 return nullptr;
323 Scalar = Builder.CreateLShr(Scalar, ShAmt);
324 }
325
326 if (NeedDestBitcast) {
327 Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
328 return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
329 }
330 return new TruncInst(Scalar, DestTy);
331 }
332
333 return nullptr;
334}
335
336/// Find elements of V demanded by UserInstr. If returns false, we were not able
337/// to determine all elements.
339 APInt &UnionUsedElts) {
340 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
341
342 switch (UserInstr->getOpcode()) {
343 case Instruction::ExtractElement: {
345 assert(EEI->getVectorOperand() == V);
347 if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
348 UnionUsedElts.setBit(EEIIndexC->getZExtValue());
349 return true;
350 }
351 break;
352 }
353 case Instruction::ShuffleVector: {
354 ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
355 unsigned MaskNumElts =
356 cast<FixedVectorType>(UserInstr->getType())->getNumElements();
357
358 for (auto I : llvm::seq(MaskNumElts)) {
359 unsigned MaskVal = Shuffle->getMaskValue(I);
360 if (MaskVal == -1u || MaskVal >= 2 * VWidth)
361 continue;
362 if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
363 UnionUsedElts.setBit(MaskVal);
364 if (Shuffle->getOperand(1) == V &&
365 ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
366 UnionUsedElts.setBit(MaskVal - VWidth);
367 }
368 return true;
369 }
370 default:
371 break;
372 }
373
374 return false;
375}
376
377/// Find union of elements of V demanded by all its users.
378/// If it is known by querying findDemandedEltsBySingleUser that
379/// no user demands an element of V, then the corresponding bit
380/// remains unset in the returned value.
382 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
383
384 APInt UnionUsedElts(VWidth, 0);
385 for (const Use &U : V->uses()) {
386 if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
387 if (!findDemandedEltsBySingleUser(V, I, UnionUsedElts))
388 return APInt::getAllOnes(VWidth);
389 } else {
390 UnionUsedElts = APInt::getAllOnes(VWidth);
391 break;
392 }
393
394 if (UnionUsedElts.isAllOnes())
395 break;
396 }
397
398 return UnionUsedElts;
399}
400
401/// Given a constant index for a extractelement or insertelement instruction,
402/// return it with the canonical type if it isn't already canonical. We
403/// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't
404/// matter, we just want a consistent type to simplify CSE.
406 const unsigned IndexBW = IndexC->getBitWidth();
407 if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64)
408 return nullptr;
409 return ConstantInt::get(IndexC->getContext(),
410 IndexC->getValue().zextOrTrunc(64));
411}
412
414 Value *SrcVec = EI.getVectorOperand();
415 Value *Index = EI.getIndexOperand();
416 if (Value *V = simplifyExtractElementInst(SrcVec, Index,
417 SQ.getWithInstruction(&EI)))
418 return replaceInstUsesWith(EI, V);
419
420 // extractelt (select %x, %vec1, %vec2), %const ->
421 // select %x, %vec1[%const], %vec2[%const]
422 // TODO: Support constant folding of multiple select operands:
423 // extractelt (select %x, %vec1, %vec2), (select %x, %c1, %c2)
424 // If the extractelement will for instance try to do out of bounds accesses
425 // because of the values of %c1 and/or %c2, the sequence could be optimized
426 // early. This is currently not possible because constant folding will reach
427 // an unreachable assertion if it doesn't find a constant operand.
429 if (SI->getCondition()->getType()->isIntegerTy() &&
431 if (Instruction *R = FoldOpIntoSelect(EI, SI))
432 return R;
433
434 // If extracting a specified index from the vector, see if we can recursively
435 // find a previously computed scalar that was inserted into the vector.
436 auto *IndexC = dyn_cast<ConstantInt>(Index);
437 bool HasKnownValidIndex = false;
438 if (IndexC) {
439 // Canonicalize type of constant indices to i64 to simplify CSE
440 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
441 return replaceOperand(EI, 1, NewIdx);
442
444 unsigned NumElts = EC.getKnownMinValue();
445 HasKnownValidIndex = IndexC->getValue().ult(NumElts);
446
448 Intrinsic::ID IID = II->getIntrinsicID();
449 // Index needs to be lower than the minimum size of the vector, because
450 // for scalable vector, the vector size is known at run time.
451 if (IID == Intrinsic::stepvector && IndexC->getValue().ult(NumElts)) {
452 Type *Ty = EI.getType();
453 unsigned BitWidth = Ty->getIntegerBitWidth();
454 Value *Idx;
455 // Return index when its value does not exceed the allowed limit
456 // for the element type of the vector.
457 // TODO: Truncate out-of-range values.
458 if (IndexC->getValue().getActiveBits() <= BitWidth)
459 Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));
460 else
461 return nullptr;
462 return replaceInstUsesWith(EI, Idx);
463 }
464 }
465
466 // InstSimplify should handle cases where the index is invalid.
467 // For fixed-length vector, it's invalid to extract out-of-range element.
468 if (!EC.isScalable() && IndexC->getValue().uge(NumElts))
469 return nullptr;
470
471 if (Instruction *I = foldBitcastExtElt(EI))
472 return I;
473
474 // If there's a vector PHI feeding a scalar use through this extractelement
475 // instruction, try to scalarize the PHI.
476 if (auto *Phi = dyn_cast<PHINode>(SrcVec))
477 if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
478 return ScalarPHI;
479 }
480
481 // If SrcVec is a subvector starting at index 0, extract from the
482 // wider source vector
483 Value *V;
484 if (match(SrcVec,
486 return ExtractElementInst::Create(V, Index);
487
488 // TODO come up with a n-ary matcher that subsumes both unary and
489 // binary matchers.
490 UnaryOperator *UO;
491 if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, Index)) {
492 // extelt (unop X), Index --> unop (extelt X, Index)
493 Value *X = UO->getOperand(0);
494 Value *E = Builder.CreateExtractElement(X, Index);
496 }
497
498 // If the binop is not speculatable, we cannot hoist the extractelement if
499 // it may make the operand poison.
500 BinaryOperator *BO;
501 if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, Index) &&
502 (HasKnownValidIndex ||
504 // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
505 Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
506 Value *E0 = Builder.CreateExtractElement(X, Index);
507 Value *E1 = Builder.CreateExtractElement(Y, Index);
508 return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
509 }
510
511 Value *X, *Y;
512 CmpPredicate Pred;
513 if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
514 cheapToScalarize(SrcVec, Index)) {
515 // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
516 Value *E0 = Builder.CreateExtractElement(X, Index);
517 Value *E1 = Builder.CreateExtractElement(Y, Index);
518 CmpInst *SrcCmpInst = cast<CmpInst>(SrcVec);
519 return CmpInst::CreateWithCopiedFlags(SrcCmpInst->getOpcode(), Pred, E0, E1,
520 SrcCmpInst);
521 }
522
523 if (auto *I = dyn_cast<Instruction>(SrcVec)) {
524 if (auto *IE = dyn_cast<InsertElementInst>(I)) {
525 // instsimplify already handled the case where the indices are constants
526 // and equal by value, if both are constants, they must not be the same
527 // value, extract from the pre-inserted value instead.
528 if (isa<Constant>(IE->getOperand(2)) && IndexC)
529 return replaceOperand(EI, 0, IE->getOperand(0));
530 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
531 auto *VecType = cast<VectorType>(GEP->getType());
532 ElementCount EC = VecType->getElementCount();
533 uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;
534 if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
535 // Find out why we have a vector result - these are a few examples:
536 // 1. We have a scalar pointer and a vector of indices, or
537 // 2. We have a vector of pointers and a scalar index, or
538 // 3. We have a vector of pointers and a vector of indices, etc.
539 // Here we only consider combining when there is exactly one vector
540 // operand, since the optimization is less obviously a win due to
541 // needing more than one extractelements.
542
543 unsigned VectorOps =
544 llvm::count_if(GEP->operands(), [](const Value *V) {
545 return isa<VectorType>(V->getType());
546 });
547 if (VectorOps == 1) {
548 Value *NewPtr = GEP->getPointerOperand();
549 if (isa<VectorType>(NewPtr->getType()))
550 NewPtr = Builder.CreateExtractElement(NewPtr, IndexC);
551
553 for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
554 Value *Op = GEP->getOperand(I);
555 if (isa<VectorType>(Op->getType()))
556 NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));
557 else
558 NewOps.push_back(Op);
559 }
560
562 GEP->getSourceElementType(), NewPtr, NewOps);
563 NewGEP->setNoWrapFlags(GEP->getNoWrapFlags());
564 return NewGEP;
565 }
566 }
567 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
568 int SplatIndex = getSplatIndex(SVI->getShuffleMask());
569 // We know the all-0 splat must be reading from the first operand, even
570 // in the case of scalable vectors (vscale is always > 0).
571 if (SplatIndex == 0)
572 return ExtractElementInst::Create(SVI->getOperand(0),
573 Builder.getInt64(0));
574
575 if (isa<FixedVectorType>(SVI->getType())) {
576 std::optional<int> SrcIdx;
577 // getSplatIndex returns -1 to mean not-found.
578 if (SplatIndex != -1)
579 SrcIdx = SplatIndex;
580 else if (ConstantInt *CI = dyn_cast<ConstantInt>(Index))
581 SrcIdx = SVI->getMaskValue(CI->getZExtValue());
582
583 if (SrcIdx) {
584 Value *Src;
585 unsigned LHSWidth =
586 cast<FixedVectorType>(SVI->getOperand(0)->getType())
587 ->getNumElements();
588
589 if (*SrcIdx < 0)
591 if (*SrcIdx < (int)LHSWidth)
592 Src = SVI->getOperand(0);
593 else {
594 *SrcIdx -= LHSWidth;
595 Src = SVI->getOperand(1);
596 }
597 Type *Int64Ty = Type::getInt64Ty(EI.getContext());
599 Src, ConstantInt::get(Int64Ty, *SrcIdx, false));
600 }
601 }
602 } else if (auto *CI = dyn_cast<CastInst>(I)) {
603 // Canonicalize extractelement(cast) -> cast(extractelement).
604 // Bitcasts can change the number of vector elements, and they cost
605 // nothing.
606 // If the CI has only one use, but that use is inside a loop, this
607 // canonicalization is not profitable because it would turn a vector
608 // operation into scalar operations inside the loop. Apply the transform
609 // when:
610 // - the index is constant and CI has one use, or
611 // - the CI and EI are in the same basic block, so the cast won't be sunk
612 // into a loop.
613 if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast) &&
614 (EI.getParent() == CI->getParent() || isa<ConstantInt>(Index))) {
615 Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
616 return CastInst::Create(CI->getOpcode(), EE, EI.getType());
617 }
618 }
619 }
620
621 // Run demanded elements after other transforms as this can drop flags on
622 // binops. If there's two paths to the same final result, we prefer the
623 // one which doesn't force us to drop flags.
624 if (IndexC) {
626 unsigned NumElts = EC.getKnownMinValue();
627 // This instruction only demands the single element from the input vector.
628 // Skip for scalable type, the number of elements is unknown at
629 // compile-time.
630 if (!EC.isScalable() && NumElts != 1) {
631 // If the input vector has a single use, simplify it based on this use
632 // property.
633 if (SrcVec->hasOneUse()) {
634 APInt PoisonElts(NumElts, 0);
635 APInt DemandedElts(NumElts, 0);
636 DemandedElts.setBit(IndexC->getZExtValue());
637 if (Value *V =
638 SimplifyDemandedVectorElts(SrcVec, DemandedElts, PoisonElts))
639 return replaceOperand(EI, 0, V);
640 } else {
641 // If the input vector has multiple uses, simplify it based on a union
642 // of all elements used.
643 APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
644 if (!DemandedElts.isAllOnes()) {
645 APInt PoisonElts(NumElts, 0);
647 SrcVec, DemandedElts, PoisonElts, 0 /* Depth */,
648 true /* AllowMultipleUsers */)) {
649 if (V != SrcVec) {
650 Worklist.addValue(SrcVec);
651 SrcVec->replaceAllUsesWith(V);
652 return &EI;
653 }
654 }
655 }
656 }
657 }
658 }
659 return nullptr;
660}
661
662/// If V is a shuffle of values that ONLY returns elements from either LHS or
663/// RHS, return the shuffle mask and true. Otherwise, return false.
665 SmallVectorImpl<int> &Mask) {
666 assert(LHS->getType() == RHS->getType() &&
667 "Invalid CollectSingleShuffleElements");
668 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
669
670 if (match(V, m_Poison())) {
671 Mask.assign(NumElts, -1);
672 return true;
673 }
674
675 if (V == LHS) {
676 for (unsigned i = 0; i != NumElts; ++i)
677 Mask.push_back(i);
678 return true;
679 }
680
681 if (V == RHS) {
682 for (unsigned i = 0; i != NumElts; ++i)
683 Mask.push_back(i + NumElts);
684 return true;
685 }
686
688 // If this is an insert of an extract from some other vector, include it.
689 Value *VecOp = IEI->getOperand(0);
690 Value *ScalarOp = IEI->getOperand(1);
691 Value *IdxOp = IEI->getOperand(2);
692
693 if (!isa<ConstantInt>(IdxOp))
694 return false;
695 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
696
697 if (isa<PoisonValue>(ScalarOp)) { // inserting poison into vector.
698 // We can handle this if the vector we are inserting into is
699 // transitively ok.
700 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
701 // If so, update the mask to reflect the inserted poison.
702 Mask[InsertedIdx] = -1;
703 return true;
704 }
705 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
706 if (isa<ConstantInt>(EI->getOperand(1))) {
707 unsigned ExtractedIdx =
708 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
709 unsigned NumLHSElts =
710 cast<FixedVectorType>(LHS->getType())->getNumElements();
711
712 // This must be extracting from either LHS or RHS.
713 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
714 // We can handle this if the vector we are inserting into is
715 // transitively ok.
716 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
717 // If so, update the mask to reflect the inserted value.
718 if (EI->getOperand(0) == LHS) {
719 Mask[InsertedIdx % NumElts] = ExtractedIdx;
720 } else {
721 assert(EI->getOperand(0) == RHS);
722 Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
723 }
724 return true;
725 }
726 }
727 }
728 }
729 }
730
731 return false;
732}
733
734/// If we have insertion into a vector that is wider than the vector that we
735/// are extracting from, try to widen the source vector to allow a single
736/// shufflevector to replace one or more insert/extract pairs.
738 ExtractElementInst *ExtElt,
739 InstCombinerImpl &IC) {
740 auto *InsVecType = cast<FixedVectorType>(InsElt->getType());
741 auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType());
742 unsigned NumInsElts = InsVecType->getNumElements();
743 unsigned NumExtElts = ExtVecType->getNumElements();
744
745 // The inserted-to vector must be wider than the extracted-from vector.
746 if (InsVecType->getElementType() != ExtVecType->getElementType() ||
747 NumExtElts >= NumInsElts)
748 return false;
749
750 Value *ExtVecOp = ExtElt->getVectorOperand();
751 // Bail out on constant vectors.
752 if (isa<ConstantData>(ExtVecOp))
753 return false;
754
755 // Create a shuffle mask to widen the extended-from vector using poison
756 // values. The mask selects all of the values of the original vector followed
757 // by as many poison values as needed to create a vector of the same length
758 // as the inserted-to vector.
759 SmallVector<int, 16> ExtendMask;
760 for (unsigned i = 0; i < NumExtElts; ++i)
761 ExtendMask.push_back(i);
762 for (unsigned i = NumExtElts; i < NumInsElts; ++i)
763 ExtendMask.push_back(-1);
764
765 auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
766 BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
767 ? ExtVecOpInst->getParent()
768 : ExtElt->getParent();
769
770 // TODO: This restriction matches the basic block check below when creating
771 // new extractelement instructions. If that limitation is removed, this one
772 // could also be removed. But for now, we just bail out to ensure that we
773 // will replace the extractelement instruction that is feeding our
774 // insertelement instruction. This allows the insertelement to then be
775 // replaced by a shufflevector. If the insertelement is not replaced, we can
776 // induce infinite looping because there's an optimization for extractelement
777 // that will delete our widening shuffle. This would trigger another attempt
778 // here to create that shuffle, and we spin forever.
779 if (InsertionBlock != InsElt->getParent())
780 return false;
781
782 // TODO: This restriction matches the check in visitInsertElementInst() and
783 // prevents an infinite loop caused by not turning the extract/insert pair
784 // into a shuffle. We really should not need either check, but we're lacking
785 // folds for shufflevectors because we're afraid to generate shuffle masks
786 // that the backend can't handle.
787 if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
788 return false;
789
790 auto *WideVec = new ShuffleVectorInst(ExtVecOp, ExtendMask);
791
792 // Insert the new shuffle after the vector operand of the extract is defined
793 // (as long as it's not a PHI) or at the start of the basic block of the
794 // extract, so any subsequent extracts in the same basic block can use it.
795 // TODO: Insert before the earliest ExtractElementInst that is replaced.
796 if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
797 WideVec->insertAfter(ExtVecOpInst->getIterator());
798 else
799 IC.InsertNewInstWith(WideVec, ExtElt->getParent()->getFirstInsertionPt());
800
801 // Replace extracts from the original narrow vector with extracts from the new
802 // wide vector.
803 for (User *U : ExtVecOp->users()) {
805 if (!OldExt || OldExt->getParent() != WideVec->getParent())
806 continue;
807 auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
808 IC.InsertNewInstWith(NewExt, OldExt->getIterator());
809 IC.replaceInstUsesWith(*OldExt, NewExt);
810 // Add the old extracts to the worklist for DCE. We can't remove the
811 // extracts directly, because they may still be used by the calling code.
812 IC.addToWorklist(OldExt);
813 }
814
815 return true;
816}
817
818/// We are building a shuffle to create V, which is a sequence of insertelement,
819/// extractelement pairs. If PermittedRHS is set, then we must either use it or
820/// not rely on the second vector source. Return a std::pair containing the
821/// left and right vectors of the proposed shuffle (or 0), and set the Mask
822/// parameter as required.
823///
824/// Note: we intentionally don't try to fold earlier shuffles since they have
825/// often been chosen carefully to be efficiently implementable on the target.
826using ShuffleOps = std::pair<Value *, Value *>;
827
829 Value *PermittedRHS,
830 InstCombinerImpl &IC, bool &Rerun) {
831 assert(V->getType()->isVectorTy() && "Invalid shuffle!");
832 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
833
834 if (match(V, m_Poison())) {
835 Mask.assign(NumElts, -1);
836 return std::make_pair(
837 PermittedRHS ? PoisonValue::get(PermittedRHS->getType()) : V, nullptr);
838 }
839
841 Mask.assign(NumElts, 0);
842 return std::make_pair(V, nullptr);
843 }
844
846 // If this is an insert of an extract from some other vector, include it.
847 Value *VecOp = IEI->getOperand(0);
848 Value *ScalarOp = IEI->getOperand(1);
849 Value *IdxOp = IEI->getOperand(2);
850
852 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
853 unsigned ExtractedIdx =
854 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
855 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
856
857 // Either the extracted from or inserted into vector must be RHSVec,
858 // otherwise we'd end up with a shuffle of three inputs.
859 if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
860 Value *RHS = EI->getOperand(0);
861 ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC, Rerun);
862 assert(LR.second == nullptr || LR.second == RHS);
863
864 if (LR.first->getType() != RHS->getType()) {
865 // Although we are giving up for now, see if we can create extracts
866 // that match the inserts for another round of combining.
867 if (replaceExtractElements(IEI, EI, IC))
868 Rerun = true;
869
870 // We tried our best, but we can't find anything compatible with RHS
871 // further up the chain. Return a trivial shuffle.
872 for (unsigned i = 0; i < NumElts; ++i)
873 Mask[i] = i;
874 return std::make_pair(V, nullptr);
875 }
876
877 unsigned NumLHSElts =
878 cast<FixedVectorType>(RHS->getType())->getNumElements();
879 Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
880 return std::make_pair(LR.first, RHS);
881 }
882
883 if (VecOp == PermittedRHS) {
884 // We've gone as far as we can: anything on the other side of the
885 // extractelement will already have been converted into a shuffle.
886 unsigned NumLHSElts =
888 ->getNumElements();
889 for (unsigned i = 0; i != NumElts; ++i)
890 Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
891 return std::make_pair(EI->getOperand(0), PermittedRHS);
892 }
893
894 // If this insertelement is a chain that comes from exactly these two
895 // vectors, return the vector and the effective shuffle.
896 if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
897 collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
898 Mask))
899 return std::make_pair(EI->getOperand(0), PermittedRHS);
900 }
901 }
902 }
903
904 // Otherwise, we can't do anything fancy. Return an identity vector.
905 for (unsigned i = 0; i != NumElts; ++i)
906 Mask.push_back(i);
907 return std::make_pair(V, nullptr);
908}
909
910/// Look for chain of insertvalue's that fully define an aggregate, and trace
911/// back the values inserted, see if they are all were extractvalue'd from
912/// the same source aggregate from the exact same element indexes.
913/// If they were, just reuse the source aggregate.
914/// This potentially deals with PHI indirections.
916 InsertValueInst &OrigIVI) {
917 Type *AggTy = OrigIVI.getType();
918 unsigned NumAggElts;
919 switch (AggTy->getTypeID()) {
920 case Type::StructTyID:
921 NumAggElts = AggTy->getStructNumElements();
922 break;
923 case Type::ArrayTyID:
924 NumAggElts = AggTy->getArrayNumElements();
925 break;
926 default:
927 llvm_unreachable("Unhandled aggregate type?");
928 }
929
930 // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
931 // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}),
932 // FIXME: any interesting patterns to be caught with larger limit?
933 assert(NumAggElts > 0 && "Aggregate should have elements.");
934 if (NumAggElts > 2)
935 return nullptr;
936
937 static constexpr auto NotFound = std::nullopt;
938 static constexpr auto FoundMismatch = nullptr;
939
940 // Try to find a value of each element of an aggregate.
941 // FIXME: deal with more complex, not one-dimensional, aggregate types
942 SmallVector<std::optional<Instruction *>, 2> AggElts(NumAggElts, NotFound);
943
944 // Do we know values for each element of the aggregate?
945 auto KnowAllElts = [&AggElts]() {
946 return !llvm::is_contained(AggElts, NotFound);
947 };
948
949 int Depth = 0;
950
951 // Arbitrary `insertvalue` visitation depth limit. Let's be okay with
952 // every element being overwritten twice, which should never happen.
953 static const int DepthLimit = 2 * NumAggElts;
954
955 // Recurse up the chain of `insertvalue` aggregate operands until either we've
956 // reconstructed full initializer or can't visit any more `insertvalue`'s.
957 for (InsertValueInst *CurrIVI = &OrigIVI;
958 Depth < DepthLimit && CurrIVI && !KnowAllElts();
959 CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),
960 ++Depth) {
961 auto *InsertedValue =
962 dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand());
963 if (!InsertedValue)
964 return nullptr; // Inserted value must be produced by an instruction.
965
966 ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
967
968 // Don't bother with more than single-level aggregates.
969 if (Indices.size() != 1)
970 return nullptr; // FIXME: deal with more complex aggregates?
971
972 // Now, we may have already previously recorded the value for this element
973 // of an aggregate. If we did, that means the CurrIVI will later be
974 // overwritten with the already-recorded value. But if not, let's record it!
975 std::optional<Instruction *> &Elt = AggElts[Indices.front()];
976 Elt = Elt.value_or(InsertedValue);
977
978 // FIXME: should we handle chain-terminating undef base operand?
979 }
980
981 // Was that sufficient to deduce the full initializer for the aggregate?
982 if (!KnowAllElts())
983 return nullptr; // Give up then.
984
985 // We now want to find the source[s] of the aggregate elements we've found.
986 // And with "source" we mean the original aggregate[s] from which
987 // the inserted elements were extracted. This may require PHI translation.
988
989 enum class AggregateDescription {
990 /// When analyzing the value that was inserted into an aggregate, we did
991 /// not manage to find defining `extractvalue` instruction to analyze.
992 NotFound,
993 /// When analyzing the value that was inserted into an aggregate, we did
994 /// manage to find defining `extractvalue` instruction[s], and everything
995 /// matched perfectly - aggregate type, element insertion/extraction index.
996 Found,
997 /// When analyzing the value that was inserted into an aggregate, we did
998 /// manage to find defining `extractvalue` instruction, but there was
999 /// a mismatch: either the source type from which the extraction was didn't
1000 /// match the aggregate type into which the insertion was,
1001 /// or the extraction/insertion channels mismatched,
1002 /// or different elements had different source aggregates.
1003 FoundMismatch
1004 };
1005 auto Describe = [](std::optional<Value *> SourceAggregate) {
1006 if (SourceAggregate == NotFound)
1007 return AggregateDescription::NotFound;
1008 if (*SourceAggregate == FoundMismatch)
1009 return AggregateDescription::FoundMismatch;
1010 return AggregateDescription::Found;
1011 };
1012
1013 // If an aggregate element is defined in UseBB, we can't use it in PredBB.
1014 bool EltDefinedInUseBB = false;
1015
1016 // Given the value \p Elt that was being inserted into element \p EltIdx of an
1017 // aggregate AggTy, see if \p Elt was originally defined by an
1018 // appropriate extractvalue (same element index, same aggregate type).
1019 // If found, return the source aggregate from which the extraction was.
1020 // If \p PredBB is provided, does PHI translation of an \p Elt first.
1021 auto FindSourceAggregate =
1022 [&](Instruction *Elt, unsigned EltIdx, std::optional<BasicBlock *> UseBB,
1023 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
1024 // For now(?), only deal with, at most, a single level of PHI indirection.
1025 if (UseBB && PredBB) {
1026 Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB));
1027 if (Elt && Elt->getParent() == *UseBB)
1028 EltDefinedInUseBB = true;
1029 }
1030 // FIXME: deal with multiple levels of PHI indirection?
1031
1032 // Did we find an extraction?
1033 auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt);
1034 if (!EVI)
1035 return NotFound;
1036
1037 Value *SourceAggregate = EVI->getAggregateOperand();
1038
1039 // Is the extraction from the same type into which the insertion was?
1040 if (SourceAggregate->getType() != AggTy)
1041 return FoundMismatch;
1042 // And the element index doesn't change between extraction and insertion?
1043 if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())
1044 return FoundMismatch;
1045
1046 return SourceAggregate; // AggregateDescription::Found
1047 };
1048
1049 // Given elements AggElts that were constructing an aggregate OrigIVI,
1050 // see if we can find appropriate source aggregate for each of the elements,
1051 // and see it's the same aggregate for each element. If so, return it.
1052 auto FindCommonSourceAggregate =
1053 [&](std::optional<BasicBlock *> UseBB,
1054 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
1055 std::optional<Value *> SourceAggregate;
1056
1057 for (auto I : enumerate(AggElts)) {
1058 assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
1059 "We don't store nullptr in SourceAggregate!");
1060 assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
1061 (I.index() != 0) &&
1062 "SourceAggregate should be valid after the first element,");
1063
1064 // For this element, is there a plausible source aggregate?
1065 // FIXME: we could special-case undef element, IFF we know that in the
1066 // source aggregate said element isn't poison.
1067 std::optional<Value *> SourceAggregateForElement =
1068 FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB);
1069
1070 // Okay, what have we found? Does that correlate with previous findings?
1071
1072 // Regardless of whether or not we have previously found source
1073 // aggregate for previous elements (if any), if we didn't find one for
1074 // this element, passthrough whatever we have just found.
1075 if (Describe(SourceAggregateForElement) != AggregateDescription::Found)
1076 return SourceAggregateForElement;
1077
1078 // Okay, we have found source aggregate for this element.
1079 // Let's see what we already know from previous elements, if any.
1080 switch (Describe(SourceAggregate)) {
1081 case AggregateDescription::NotFound:
1082 // This is apparently the first element that we have examined.
1083 SourceAggregate = SourceAggregateForElement; // Record the aggregate!
1084 continue; // Great, now look at next element.
1085 case AggregateDescription::Found:
1086 // We have previously already successfully examined other elements.
1087 // Is this the same source aggregate we've found for other elements?
1088 if (*SourceAggregateForElement != *SourceAggregate)
1089 return FoundMismatch;
1090 continue; // Still the same aggregate, look at next element.
1091 case AggregateDescription::FoundMismatch:
1092 llvm_unreachable("Can't happen. We would have early-exited then.");
1093 };
1094 }
1095
1096 assert(Describe(SourceAggregate) == AggregateDescription::Found &&
1097 "Must be a valid Value");
1098 return *SourceAggregate;
1099 };
1100
1101 std::optional<Value *> SourceAggregate;
1102
1103 // Can we find the source aggregate without looking at predecessors?
1104 SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/std::nullopt,
1105 /*PredBB=*/std::nullopt);
1106 if (Describe(SourceAggregate) != AggregateDescription::NotFound) {
1107 if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)
1108 return nullptr; // Conflicting source aggregates!
1109 ++NumAggregateReconstructionsSimplified;
1110 return replaceInstUsesWith(OrigIVI, *SourceAggregate);
1111 }
1112
1113 // Okay, apparently we need to look at predecessors.
1114
1115 // We should be smart about picking the "use" basic block, which will be the
1116 // merge point for aggregate, where we'll insert the final PHI that will be
1117 // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice.
1118 // We should look in which blocks each of the AggElts is being defined,
1119 // they all should be defined in the same basic block.
1120 BasicBlock *UseBB = nullptr;
1121
1122 for (const std::optional<Instruction *> &I : AggElts) {
1123 BasicBlock *BB = (*I)->getParent();
1124 // If it's the first instruction we've encountered, record the basic block.
1125 if (!UseBB) {
1126 UseBB = BB;
1127 continue;
1128 }
1129 // Otherwise, this must be the same basic block we've seen previously.
1130 if (UseBB != BB)
1131 return nullptr;
1132 }
1133
1134 // If *all* of the elements are basic-block-independent, meaning they are
1135 // either function arguments, or constant expressions, then if we didn't
1136 // handle them without predecessor-aware handling, we won't handle them now.
1137 if (!UseBB)
1138 return nullptr;
1139
1140 // If we didn't manage to find source aggregate without looking at
1141 // predecessors, and there are no predecessors to look at, then we're done.
1142 if (pred_empty(UseBB))
1143 return nullptr;
1144
1145 // Arbitrary predecessor count limit.
1146 static const int PredCountLimit = 64;
1147
1148 // Cache the (non-uniqified!) list of predecessors in a vector,
1149 // checking the limit at the same time for efficiency.
1150 SmallVector<BasicBlock *, 4> Preds; // May have duplicates!
1151 for (BasicBlock *Pred : predecessors(UseBB)) {
1152 // Don't bother if there are too many predecessors.
1153 if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
1154 return nullptr;
1155 Preds.emplace_back(Pred);
1156 }
1157
1158 // For each predecessor, what is the source aggregate,
1159 // from which all the elements were originally extracted from?
1160 // Note that we want for the map to have stable iteration order!
1162 bool FoundSrcAgg = false;
1163 for (BasicBlock *Pred : Preds) {
1164 std::pair<decltype(SourceAggregates)::iterator, bool> IV =
1165 SourceAggregates.try_emplace(Pred);
1166 // Did we already evaluate this predecessor?
1167 if (!IV.second)
1168 continue;
1169
1170 // Let's hope that when coming from predecessor Pred, all elements of the
1171 // aggregate produced by OrigIVI must have been originally extracted from
1172 // the same aggregate. Is that so? Can we find said original aggregate?
1173 SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
1174 if (Describe(SourceAggregate) == AggregateDescription::Found) {
1175 FoundSrcAgg = true;
1176 IV.first->second = *SourceAggregate;
1177 } else {
1178 // If UseBB is the single successor of Pred, we can add InsertValue to
1179 // Pred.
1180 auto *BI = dyn_cast<UncondBrInst>(Pred->getTerminator());
1181 if (!BI)
1182 return nullptr;
1183 }
1184 }
1185
1186 if (!FoundSrcAgg)
1187 return nullptr;
1188
1189 // Do some sanity check if we need to add insertvalue into predecessors.
1190 auto OrigBB = OrigIVI.getParent();
1191 for (auto &It : SourceAggregates) {
1192 if (Describe(It.second) == AggregateDescription::Found)
1193 continue;
1194
1195 // Element is defined in UseBB, so it can't be used in predecessors.
1196 if (EltDefinedInUseBB)
1197 return nullptr;
1198
1199 // Do this transformation cross loop boundary may cause dead loop. So we
1200 // should avoid this situation. But LoopInfo is not generally available, we
1201 // must be conservative here.
1202 // If OrigIVI is in UseBB and it's the only successor of PredBB, PredBB
1203 // can't be in inner loop.
1204 if (UseBB != OrigBB)
1205 return nullptr;
1206
1207 // Avoid constructing constant aggregate because constant value may expose
1208 // more optimizations.
1209 bool ConstAgg = true;
1210 for (auto Val : AggElts) {
1211 Value *Elt = (*Val)->DoPHITranslation(UseBB, It.first);
1212 if (!isa<Constant>(Elt)) {
1213 ConstAgg = false;
1214 break;
1215 }
1216 }
1217 if (ConstAgg)
1218 return nullptr;
1219 }
1220
1221 // For predecessors without appropriate source aggregate, create one in the
1222 // predecessor.
1223 for (auto &It : SourceAggregates) {
1224 if (Describe(It.second) == AggregateDescription::Found)
1225 continue;
1226
1227 BasicBlock *Pred = It.first;
1228 Builder.SetInsertPoint(Pred->getTerminator());
1229 Value *V = PoisonValue::get(AggTy);
1230 for (auto [Idx, Val] : enumerate(AggElts)) {
1231 Value *Elt = (*Val)->DoPHITranslation(UseBB, Pred);
1232 V = Builder.CreateInsertValue(V, Elt, Idx);
1233 }
1234
1235 It.second = V;
1236 }
1237
1238 // All good! Now we just need to thread the source aggregates here.
1239 // Note that we have to insert the new PHI here, ourselves, because we can't
1240 // rely on InstCombinerImpl::run() inserting it into the right basic block.
1241 // Note that the same block can be a predecessor more than once,
1242 // and we need to preserve that invariant for the PHI node.
1244 Builder.SetInsertPoint(UseBB, UseBB->getFirstNonPHIIt());
1245 auto *PHI =
1246 Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged");
1247 for (BasicBlock *Pred : Preds)
1248 PHI->addIncoming(SourceAggregates[Pred], Pred);
1249
1250 ++NumAggregateReconstructionsSimplified;
1251 return replaceInstUsesWith(OrigIVI, PHI);
1252}
1253
1254/// Try to find redundant insertvalue instructions, like the following ones:
1255/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
1256/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
1257/// Here the second instruction inserts values at the same indices, as the
1258/// first one, making the first one redundant.
1259/// It should be transformed to:
1260/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
1263 I.getAggregateOperand(), I.getInsertedValueOperand(), I.getIndices(),
1264 SQ.getWithInstruction(&I)))
1265 return replaceInstUsesWith(I, V);
1266
1267 bool IsRedundant = false;
1268 ArrayRef<unsigned int> FirstIndices = I.getIndices();
1269
1270 // If there is a chain of insertvalue instructions (each of them except the
1271 // last one has only one use and it's another insertvalue insn from this
1272 // chain), check if any of the 'children' uses the same indices as the first
1273 // instruction. In this case, the first one is redundant.
1274 Value *V = &I;
1275 unsigned Depth = 0;
1276 while (V->hasOneUse() && Depth < 10) {
1277 User *U = V->user_back();
1278 auto UserInsInst = dyn_cast<InsertValueInst>(U);
1279 if (!UserInsInst || U->getOperand(0) != V)
1280 break;
1281 if (UserInsInst->getIndices() == FirstIndices) {
1282 IsRedundant = true;
1283 break;
1284 }
1285 V = UserInsInst;
1286 Depth++;
1287 }
1288
1289 if (IsRedundant)
1290 return replaceInstUsesWith(I, I.getOperand(0));
1291
1293 return NewI;
1294
1295 return nullptr;
1296}
1297
1299 // Can not analyze scalable type, the number of elements is not a compile-time
1300 // constant.
1302 return false;
1303
1304 int MaskSize = Shuf.getShuffleMask().size();
1305 int VecSize =
1306 cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements();
1307
1308 // A vector select does not change the size of the operands.
1309 if (MaskSize != VecSize)
1310 return false;
1311
1312 // Each mask element must be undefined or choose a vector element from one of
1313 // the source operands without crossing vector lanes.
1314 for (int i = 0; i != MaskSize; ++i) {
1315 int Elt = Shuf.getMaskValue(i);
1316 if (Elt != -1 && Elt != i && Elt != i + VecSize)
1317 return false;
1318 }
1319
1320 return true;
1321}
1322
1323/// Turn a chain of inserts that splats a value into an insert + shuffle:
1324/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1325/// shufflevector(insertelt(X, %k, 0), poison, zero)
1327 // We are interested in the last insert in a chain. So if this insert has a
1328 // single user and that user is an insert, bail.
1329 if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
1330 return nullptr;
1331
1332 VectorType *VecTy = InsElt.getType();
1333 // Can not handle scalable type, the number of elements is not a compile-time
1334 // constant.
1335 if (isa<ScalableVectorType>(VecTy))
1336 return nullptr;
1337 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
1338
1339 // Do not try to do this for a one-element vector, since that's a nop,
1340 // and will cause an inf-loop.
1341 if (NumElements == 1)
1342 return nullptr;
1343
1344 Value *SplatVal = InsElt.getOperand(1);
1345 InsertElementInst *CurrIE = &InsElt;
1346 SmallBitVector ElementPresent(NumElements, false);
1347 InsertElementInst *FirstIE = nullptr;
1348
1349 // Walk the chain backwards, keeping track of which indices we inserted into,
1350 // until we hit something that isn't an insert of the splatted value.
1351 while (CurrIE) {
1352 auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
1353 if (!Idx || CurrIE->getOperand(1) != SplatVal)
1354 return nullptr;
1355
1356 auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
1357 // Check none of the intermediate steps have any additional uses, except
1358 // for the root insertelement instruction, which can be re-used, if it
1359 // inserts at position 0.
1360 if (CurrIE != &InsElt &&
1361 (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
1362 return nullptr;
1363
1364 ElementPresent[Idx->getZExtValue()] = true;
1365 FirstIE = CurrIE;
1366 CurrIE = NextIE;
1367 }
1368
1369 // If this is just a single insertelement (not a sequence), we are done.
1370 if (FirstIE == &InsElt)
1371 return nullptr;
1372
1373 // If we are not inserting into a poison vector, make sure we've seen an
1374 // insert into every element.
1375 // TODO: If the base vector is not undef, it might be better to create a splat
1376 // and then a select-shuffle (blend) with the base vector.
1377 if (!match(FirstIE->getOperand(0), m_Poison()))
1378 if (!ElementPresent.all())
1379 return nullptr;
1380
1381 // Create the insert + shuffle.
1382 Type *Int64Ty = Type::getInt64Ty(InsElt.getContext());
1383 PoisonValue *PoisonVec = PoisonValue::get(VecTy);
1384 Constant *Zero = ConstantInt::get(Int64Ty, 0);
1385 if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
1386 FirstIE = InsertElementInst::Create(PoisonVec, SplatVal, Zero, "",
1387 InsElt.getIterator());
1388
1389 // Splat from element 0, but replace absent elements with poison in the mask.
1390 SmallVector<int, 16> Mask(NumElements, 0);
1391 for (unsigned i = 0; i != NumElements; ++i)
1392 if (!ElementPresent[i])
1393 Mask[i] = -1;
1394
1395 return new ShuffleVectorInst(FirstIE, Mask);
1396}
1397
1398/// Try to fold an insert element into an existing splat shuffle by changing
1399/// the shuffle's mask to include the index of this insert element.
1401 // Check if the vector operand of this insert is a canonical splat shuffle.
1402 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1403 if (!Shuf || !Shuf->isZeroEltSplat())
1404 return nullptr;
1405
1406 // Bail out early if shuffle is scalable type. The number of elements in
1407 // shuffle mask is unknown at compile-time.
1408 if (isa<ScalableVectorType>(Shuf->getType()))
1409 return nullptr;
1410
1411 // Check for a constant insertion index.
1412 uint64_t IdxC;
1413 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1414 return nullptr;
1415
1416 // Check if the splat shuffle's input is the same as this insert's scalar op.
1417 Value *X = InsElt.getOperand(1);
1418 Value *Op0 = Shuf->getOperand(0);
1419 if (!match(Op0, m_InsertElt(m_Undef(), m_Specific(X), m_ZeroInt())))
1420 return nullptr;
1421
1422 // Replace the shuffle mask element at the index of this insert with a zero.
1423 // For example:
1424 // inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1
1425 // --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>
1426 unsigned NumMaskElts =
1427 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1428 SmallVector<int, 16> NewMask(NumMaskElts);
1429 for (unsigned i = 0; i != NumMaskElts; ++i)
1430 NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
1431
1432 return new ShuffleVectorInst(Op0, NewMask);
1433}
1434
1435/// Try to fold an extract+insert element into an existing identity shuffle by
1436/// changing the shuffle's mask to include the index of this insert element.
1438 // Check if the vector operand of this insert is an identity shuffle.
1439 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1440 if (!Shuf || !match(Shuf->getOperand(1), m_Poison()) ||
1441 !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
1442 return nullptr;
1443
1444 // Bail out early if shuffle is scalable type. The number of elements in
1445 // shuffle mask is unknown at compile-time.
1446 if (isa<ScalableVectorType>(Shuf->getType()))
1447 return nullptr;
1448
1449 // Check for a constant insertion index.
1450 uint64_t IdxC;
1451 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1452 return nullptr;
1453
1454 // Check if this insert's scalar op is extracted from the identity shuffle's
1455 // input vector.
1456 Value *Scalar = InsElt.getOperand(1);
1457 Value *X = Shuf->getOperand(0);
1458 if (!match(Scalar, m_ExtractElt(m_Specific(X), m_SpecificInt(IdxC))))
1459 return nullptr;
1460
1461 // Replace the shuffle mask element at the index of this extract+insert with
1462 // that same index value.
1463 // For example:
1464 // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
1465 unsigned NumMaskElts =
1466 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1467 SmallVector<int, 16> NewMask(NumMaskElts);
1468 ArrayRef<int> OldMask = Shuf->getShuffleMask();
1469 for (unsigned i = 0; i != NumMaskElts; ++i) {
1470 if (i != IdxC) {
1471 // All mask elements besides the inserted element remain the same.
1472 NewMask[i] = OldMask[i];
1473 } else if (OldMask[i] == (int)IdxC) {
1474 // If the mask element was already set, there's nothing to do
1475 // (demanded elements analysis may unset it later).
1476 return nullptr;
1477 } else {
1478 assert(OldMask[i] == PoisonMaskElem &&
1479 "Unexpected shuffle mask element for identity shuffle");
1480 NewMask[i] = IdxC;
1481 }
1482 }
1483
1484 return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
1485}
1486
1487/// If we have an insertelement instruction feeding into another insertelement
1488/// and the 2nd is inserting a constant into the vector, canonicalize that
1489/// constant insertion before the insertion of a variable:
1490///
1491/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
1492/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
1493///
1494/// This has the potential of eliminating the 2nd insertelement instruction
1495/// via constant folding of the scalar constant into a vector constant.
1497 InstCombiner::BuilderTy &Builder) {
1498 auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
1499 if (!InsElt1 || !InsElt1->hasOneUse())
1500 return nullptr;
1501
1502 Value *X, *Y;
1503 Constant *ScalarC;
1504 ConstantInt *IdxC1, *IdxC2;
1505 if (match(InsElt1->getOperand(0), m_Value(X)) &&
1506 match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
1507 match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
1508 match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
1509 match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
1510 Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
1511 return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
1512 }
1513
1514 return nullptr;
1515}
1516
1517/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
1518/// --> shufflevector X, CVec', Mask'
1520 auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
1521 // Bail out if the parent has more than one use. In that case, we'd be
1522 // replacing the insertelt with a shuffle, and that's not a clear win.
1523 if (!Inst || !Inst->hasOneUse())
1524 return nullptr;
1525 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
1526 // The shuffle must have a constant vector operand. The insertelt must have
1527 // a constant scalar being inserted at a constant position in the vector.
1528 Constant *ShufConstVec, *InsEltScalar;
1529 uint64_t InsEltIndex;
1530 if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
1531 !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
1532 !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
1533 return nullptr;
1534
1535 // Adding an element to an arbitrary shuffle could be expensive, but a
1536 // shuffle that selects elements from vectors without crossing lanes is
1537 // assumed cheap.
1538 // If we're just adding a constant into that shuffle, it will still be
1539 // cheap.
1540 if (!isShuffleEquivalentToSelect(*Shuf))
1541 return nullptr;
1542
1543 // From the above 'select' check, we know that the mask has the same number
1544 // of elements as the vector input operands. We also know that each constant
1545 // input element is used in its lane and can not be used more than once by
1546 // the shuffle. Therefore, replace the constant in the shuffle's constant
1547 // vector with the insertelt constant. Replace the constant in the shuffle's
1548 // mask vector with the insertelt index plus the length of the vector
1549 // (because the constant vector operand of a shuffle is always the 2nd
1550 // operand).
1551 ArrayRef<int> Mask = Shuf->getShuffleMask();
1552 unsigned NumElts = Mask.size();
1553 SmallVector<Constant *, 16> NewShufElts(NumElts);
1554 SmallVector<int, 16> NewMaskElts(NumElts);
1555 for (unsigned I = 0; I != NumElts; ++I) {
1556 if (I == InsEltIndex) {
1557 NewShufElts[I] = InsEltScalar;
1558 NewMaskElts[I] = InsEltIndex + NumElts;
1559 } else {
1560 // Copy over the existing values.
1561 NewShufElts[I] = ShufConstVec->getAggregateElement(I);
1562 NewMaskElts[I] = Mask[I];
1563 }
1564
1565 // Bail if we failed to find an element.
1566 if (!NewShufElts[I])
1567 return nullptr;
1568 }
1569
1570 // Create new operands for a shuffle that includes the constant of the
1571 // original insertelt. The old shuffle will be dead now.
1572 return new ShuffleVectorInst(Shuf->getOperand(0),
1573 ConstantVector::get(NewShufElts), NewMaskElts);
1574 } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
1575 // Transform sequences of insertelements ops with constant data/indexes into
1576 // a single shuffle op.
1577 // Can not handle scalable type, the number of elements needed to create
1578 // shuffle mask is not a compile-time constant.
1579 if (isa<ScalableVectorType>(InsElt.getType()))
1580 return nullptr;
1581 unsigned NumElts =
1582 cast<FixedVectorType>(InsElt.getType())->getNumElements();
1583
1584 uint64_t InsertIdx[2];
1585 Constant *Val[2];
1586 if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
1587 !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
1588 !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
1589 !match(IEI->getOperand(1), m_Constant(Val[1])))
1590 return nullptr;
1591 SmallVector<Constant *, 16> Values(NumElts);
1592 SmallVector<int, 16> Mask(NumElts);
1593 auto ValI = std::begin(Val);
1594 // Generate new constant vector and mask.
1595 // We have 2 values/masks from the insertelements instructions. Insert them
1596 // into new value/mask vectors.
1597 for (uint64_t I : InsertIdx) {
1598 if (!Values[I]) {
1599 Values[I] = *ValI;
1600 Mask[I] = NumElts + I;
1601 }
1602 ++ValI;
1603 }
1604 // Remaining values are filled with 'poison' values.
1605 for (unsigned I = 0; I < NumElts; ++I) {
1606 if (!Values[I]) {
1607 Values[I] = PoisonValue::get(InsElt.getType()->getElementType());
1608 Mask[I] = I;
1609 }
1610 }
1611 // Create new operands for a shuffle that includes the constant of the
1612 // original insertelt.
1613 return new ShuffleVectorInst(IEI->getOperand(0),
1614 ConstantVector::get(Values), Mask);
1615 }
1616 return nullptr;
1617}
1618
1619/// If both the base vector and the inserted element are extended from the same
1620/// type, do the insert element in the narrow source type followed by extend.
1621/// TODO: This can be extended to include other cast opcodes, but particularly
1622/// if we create a wider insertelement, make sure codegen is not harmed.
1624 InstCombiner::BuilderTy &Builder) {
1625 // We are creating a vector extend. If the original vector extend has another
1626 // use, that would mean we end up with 2 vector extends, so avoid that.
1627 // TODO: We could ease the use-clause to "if at least one op has one use"
1628 // (assuming that the source types match - see next TODO comment).
1629 Value *Vec = InsElt.getOperand(0);
1630 if (!Vec->hasOneUse())
1631 return nullptr;
1632
1633 Value *Scalar = InsElt.getOperand(1);
1634 Value *X, *Y;
1635 CastInst::CastOps CastOpcode;
1636 if (match(Vec, m_FPExt(m_Value(X))) && match(Scalar, m_FPExt(m_Value(Y))))
1637 CastOpcode = Instruction::FPExt;
1638 else if (match(Vec, m_SExt(m_Value(X))) && match(Scalar, m_SExt(m_Value(Y))))
1639 CastOpcode = Instruction::SExt;
1640 else if (match(Vec, m_ZExt(m_Value(X))) && match(Scalar, m_ZExt(m_Value(Y))))
1641 CastOpcode = Instruction::ZExt;
1642 else
1643 return nullptr;
1644
1645 // TODO: We can allow mismatched types by creating an intermediate cast.
1646 if (X->getType()->getScalarType() != Y->getType())
1647 return nullptr;
1648
1649 // inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
1650 Value *NewInsElt = Builder.CreateInsertElement(X, Y, InsElt.getOperand(2));
1651 return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType());
1652}
1653
1654/// If we are inserting 2 halves of a value into adjacent elements of a vector,
1655/// try to convert to a single insert with appropriate bitcasts.
1657 bool IsBigEndian,
1658 InstCombiner::BuilderTy &Builder) {
1659 Value *VecOp = InsElt.getOperand(0);
1660 Value *ScalarOp = InsElt.getOperand(1);
1661 Value *IndexOp = InsElt.getOperand(2);
1662
1663 // Pattern depends on endian because we expect lower index is inserted first.
1664 // Big endian:
1665 // inselt (inselt BaseVec, (trunc (lshr X, BW/2), Index0), (trunc X), Index1
1666 // Little endian:
1667 // inselt (inselt BaseVec, (trunc X), Index0), (trunc (lshr X, BW/2)), Index1
1668 // Note: It is not safe to do this transform with an arbitrary base vector
1669 // because the bitcast of that vector to fewer/larger elements could
1670 // allow poison to spill into an element that was not poison before.
1671 // TODO: Detect smaller fractions of the scalar.
1672 // TODO: One-use checks are conservative.
1673 auto *VTy = dyn_cast<FixedVectorType>(InsElt.getType());
1674 Value *Scalar0, *BaseVec;
1675 uint64_t Index0, Index1;
1676 if (!VTy || (VTy->getNumElements() & 1) ||
1677 !match(IndexOp, m_ConstantInt(Index1)) ||
1678 !match(VecOp, m_InsertElt(m_Value(BaseVec), m_Value(Scalar0),
1679 m_ConstantInt(Index0))) ||
1680 !match(BaseVec, m_Undef()))
1681 return nullptr;
1682
1683 // The first insert must be to the index one less than this one, and
1684 // the first insert must be to an even index.
1685 if (Index0 + 1 != Index1 || Index0 & 1)
1686 return nullptr;
1687
1688 // For big endian, the high half of the value should be inserted first.
1689 // For little endian, the low half of the value should be inserted first.
1690 Value *X;
1691 uint64_t ShAmt;
1692 if (IsBigEndian) {
1693 if (!match(ScalarOp, m_Trunc(m_Value(X))) ||
1694 !match(Scalar0, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1695 return nullptr;
1696 } else {
1697 if (!match(Scalar0, m_Trunc(m_Value(X))) ||
1698 !match(ScalarOp, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1699 return nullptr;
1700 }
1701
1702 Type *SrcTy = X->getType();
1703 unsigned ScalarWidth = SrcTy->getScalarSizeInBits();
1704 unsigned VecEltWidth = VTy->getScalarSizeInBits();
1705 if (ScalarWidth != VecEltWidth * 2 || ShAmt != VecEltWidth)
1706 return nullptr;
1707
1708 // Bitcast the base vector to a vector type with the source element type.
1709 Type *CastTy = FixedVectorType::get(SrcTy, VTy->getNumElements() / 2);
1710 Value *CastBaseVec = Builder.CreateBitCast(BaseVec, CastTy);
1711
1712 // Scale the insert index for a vector with half as many elements.
1713 // bitcast (inselt (bitcast BaseVec), X, NewIndex)
1714 uint64_t NewIndex = IsBigEndian ? Index1 / 2 : Index0 / 2;
1715 Value *NewInsert = Builder.CreateInsertElement(CastBaseVec, X, NewIndex);
1716 return new BitCastInst(NewInsert, VTy);
1717}
1718
1720 Value *VecOp = IE.getOperand(0);
1721 Value *ScalarOp = IE.getOperand(1);
1722 Value *IdxOp = IE.getOperand(2);
1723
1724 if (auto *V = simplifyInsertElementInst(
1725 VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
1726 return replaceInstUsesWith(IE, V);
1727
1728 // Canonicalize type of constant indices to i64 to simplify CSE
1729 if (auto *IndexC = dyn_cast<ConstantInt>(IdxOp)) {
1730 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
1731 return replaceOperand(IE, 2, NewIdx);
1732
1733 Value *BaseVec, *OtherScalar;
1734 uint64_t OtherIndexVal;
1735 if (match(VecOp, m_OneUse(m_InsertElt(m_Value(BaseVec),
1736 m_Value(OtherScalar),
1737 m_ConstantInt(OtherIndexVal)))) &&
1738 !isa<Constant>(OtherScalar) && OtherIndexVal > IndexC->getZExtValue()) {
1739 Value *NewIns = Builder.CreateInsertElement(BaseVec, ScalarOp, IdxOp);
1740 return InsertElementInst::Create(NewIns, OtherScalar,
1741 Builder.getInt64(OtherIndexVal));
1742 }
1743 }
1744
1745 // If the scalar is bitcast and inserted into undef, do the insert in the
1746 // source type followed by bitcast.
1747 // TODO: Generalize for insert into any constant, not just undef?
1748 Value *ScalarSrc;
1749 if (match(VecOp, m_Undef()) &&
1750 match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&
1751 (ScalarSrc->getType()->isIntegerTy() ||
1752 ScalarSrc->getType()->isFloatingPointTy())) {
1753 // inselt undef, (bitcast ScalarSrc), IdxOp -->
1754 // bitcast (inselt undef, ScalarSrc, IdxOp)
1755 Type *ScalarTy = ScalarSrc->getType();
1756 Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount());
1757 Constant *NewUndef = isa<PoisonValue>(VecOp) ? PoisonValue::get(VecTy)
1758 : UndefValue::get(VecTy);
1759 Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);
1760 return new BitCastInst(NewInsElt, IE.getType());
1761 }
1762
1763 // If the vector and scalar are both bitcast from the same element type, do
1764 // the insert in that source type followed by bitcast.
1765 Value *VecSrc;
1766 if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
1767 match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
1768 (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
1769 VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1770 cast<VectorType>(VecSrc->getType())->getElementType() ==
1771 ScalarSrc->getType()) {
1772 // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1773 // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1774 Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
1775 return new BitCastInst(NewInsElt, IE.getType());
1776 }
1777
1778 // If the inserted element was extracted from some other fixed-length vector
1779 // and both indexes are valid constants, try to turn this into a shuffle.
1780 // Can not handle scalable vector type, the number of elements needed to
1781 // create shuffle mask is not a compile-time constant.
1782 uint64_t InsertedIdx, ExtractedIdx;
1783 Value *ExtVecOp;
1784 if (isa<FixedVectorType>(IE.getType()) &&
1785 match(IdxOp, m_ConstantInt(InsertedIdx)) &&
1786 match(ScalarOp,
1787 m_ExtractElt(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) &&
1788 isa<FixedVectorType>(ExtVecOp->getType()) &&
1789 ExtractedIdx <
1790 cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) {
1791 // TODO: Looking at the user(s) to determine if this insert is a
1792 // fold-to-shuffle opportunity does not match the usual instcombine
1793 // constraints. We should decide if the transform is worthy based only
1794 // on this instruction and its operands, but that may not work currently.
1795 //
1796 // Here, we are trying to avoid creating shuffles before reaching
1797 // the end of a chain of extract-insert pairs. This is complicated because
1798 // we do not generally form arbitrary shuffle masks in instcombine
1799 // (because those may codegen poorly), but collectShuffleElements() does
1800 // exactly that.
1801 //
1802 // The rules for determining what is an acceptable target-independent
1803 // shuffle mask are fuzzy because they evolve based on the backend's
1804 // capabilities and real-world impact.
1805 auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1806 if (!Insert.hasOneUse())
1807 return true;
1808 auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
1809 if (!InsertUser)
1810 return true;
1811 return false;
1812 };
1813
1814 // Try to form a shuffle from a chain of extract-insert ops.
1815 if (isShuffleRootCandidate(IE)) {
1816 bool Rerun = true;
1817 while (Rerun) {
1818 Rerun = false;
1819
1821 ShuffleOps LR =
1822 collectShuffleElements(&IE, Mask, nullptr, *this, Rerun);
1823
1824 // The proposed shuffle may be trivial, in which case we shouldn't
1825 // perform the combine.
1826 if (LR.first != &IE && LR.second != &IE) {
1827 // We now have a shuffle of LHS, RHS, Mask.
1828 if (LR.second == nullptr)
1829 LR.second = PoisonValue::get(LR.first->getType());
1830 return new ShuffleVectorInst(LR.first, LR.second, Mask);
1831 }
1832 }
1833 }
1834 }
1835
1836 if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
1837 unsigned VWidth = VecTy->getNumElements();
1838 APInt PoisonElts(VWidth, 0);
1839 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1840 if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask,
1841 PoisonElts)) {
1842 if (V != &IE)
1843 return replaceInstUsesWith(IE, V);
1844 return &IE;
1845 }
1846 }
1847
1849 return Shuf;
1850
1851 if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
1852 return NewInsElt;
1853
1854 if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
1855 return Broadcast;
1856
1858 return Splat;
1859
1860 if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
1861 return IdentityShuf;
1862
1863 if (Instruction *Ext = narrowInsElt(IE, Builder))
1864 return Ext;
1865
1866 if (Instruction *Ext = foldTruncInsEltPair(IE, DL.isBigEndian(), Builder))
1867 return Ext;
1868
1869 return nullptr;
1870}
1871
1872/// Return true if we can evaluate the specified expression tree if the vector
1873/// elements were shuffled in a different order.
1875 unsigned Depth = 5) {
1876 // We can always reorder the elements of a constant.
1877 if (isa<Constant>(V))
1878 return true;
1879
1880 // We won't reorder vector arguments. No IPO here.
1882 if (!I) return false;
1883
1884 // Two users may expect different orders of the elements. Don't try it.
1885 if (!I->hasOneUse())
1886 return false;
1887
1888 if (Depth == 0) return false;
1889
1890 switch (I->getOpcode()) {
1891 case Instruction::UDiv:
1892 case Instruction::SDiv:
1893 case Instruction::URem:
1894 case Instruction::SRem:
1895 // Propagating an undefined shuffle mask element to integer div/rem is not
1896 // allowed because those opcodes can create immediate undefined behavior
1897 // from an undefined element in an operand.
1898 if (llvm::is_contained(Mask, -1))
1899 return false;
1900 [[fallthrough]];
1901 case Instruction::Add:
1902 case Instruction::FAdd:
1903 case Instruction::Sub:
1904 case Instruction::FSub:
1905 case Instruction::Mul:
1906 case Instruction::FMul:
1907 case Instruction::FDiv:
1908 case Instruction::FRem:
1909 case Instruction::Shl:
1910 case Instruction::LShr:
1911 case Instruction::AShr:
1912 case Instruction::And:
1913 case Instruction::Or:
1914 case Instruction::Xor:
1915 case Instruction::ICmp:
1916 case Instruction::FCmp:
1917 case Instruction::Trunc:
1918 case Instruction::ZExt:
1919 case Instruction::SExt:
1920 case Instruction::FPToUI:
1921 case Instruction::FPToSI:
1922 case Instruction::UIToFP:
1923 case Instruction::SIToFP:
1924 case Instruction::FPTrunc:
1925 case Instruction::FPExt:
1926 case Instruction::GetElementPtr: {
1927 // Bail out if we would create longer vector ops. We could allow creating
1928 // longer vector ops, but that may result in more expensive codegen.
1929 Type *ITy = I->getType();
1930 if (ITy->isVectorTy() &&
1931 Mask.size() > cast<FixedVectorType>(ITy)->getNumElements())
1932 return false;
1933 for (Value *Operand : I->operands()) {
1934 if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
1935 return false;
1936 }
1937 return true;
1938 }
1939 case Instruction::InsertElement: {
1940 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
1941 if (!CI) return false;
1942 int ElementNumber = CI->getLimitedValue();
1943
1944 // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1945 // can't put an element into multiple indices.
1946 bool SeenOnce = false;
1947 for (int I : Mask) {
1948 if (I == ElementNumber) {
1949 if (SeenOnce)
1950 return false;
1951 SeenOnce = true;
1952 }
1953 }
1954 return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
1955 }
1956 }
1957 return false;
1958}
1959
1960/// Rebuild a new instruction just like 'I' but with the new operands given.
1961/// In the event of type mismatch, the type of the operands is correct.
1963 IRBuilderBase &Builder) {
1964 Builder.SetInsertPoint(I);
1965 switch (I->getOpcode()) {
1966 case Instruction::Add:
1967 case Instruction::FAdd:
1968 case Instruction::Sub:
1969 case Instruction::FSub:
1970 case Instruction::Mul:
1971 case Instruction::FMul:
1972 case Instruction::UDiv:
1973 case Instruction::SDiv:
1974 case Instruction::FDiv:
1975 case Instruction::URem:
1976 case Instruction::SRem:
1977 case Instruction::FRem:
1978 case Instruction::Shl:
1979 case Instruction::LShr:
1980 case Instruction::AShr:
1981 case Instruction::And:
1982 case Instruction::Or:
1983 case Instruction::Xor: {
1985 assert(NewOps.size() == 2 && "binary operator with #ops != 2");
1986 Value *New = Builder.CreateBinOp(cast<BinaryOperator>(I)->getOpcode(),
1987 NewOps[0], NewOps[1]);
1988 if (auto *NewI = dyn_cast<Instruction>(New)) {
1990 NewI->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1991 NewI->setHasNoSignedWrap(BO->hasNoSignedWrap());
1992 }
1994 NewI->setIsExact(BO->isExact());
1995 }
1996 if (isa<FPMathOperator>(BO))
1997 NewI->copyFastMathFlags(I);
1998 }
1999 return New;
2000 }
2001 case Instruction::ICmp:
2002 assert(NewOps.size() == 2 && "icmp with #ops != 2");
2003 return Builder.CreateICmp(cast<ICmpInst>(I)->getPredicate(), NewOps[0],
2004 NewOps[1]);
2005 case Instruction::FCmp:
2006 assert(NewOps.size() == 2 && "fcmp with #ops != 2");
2007 return Builder.CreateFCmp(cast<FCmpInst>(I)->getPredicate(), NewOps[0],
2008 NewOps[1]);
2009 case Instruction::Trunc:
2010 case Instruction::ZExt:
2011 case Instruction::SExt:
2012 case Instruction::FPToUI:
2013 case Instruction::FPToSI:
2014 case Instruction::UIToFP:
2015 case Instruction::SIToFP:
2016 case Instruction::FPTrunc:
2017 case Instruction::FPExt: {
2018 // It's possible that the mask has a different number of elements from
2019 // the original cast. We recompute the destination type to match the mask.
2020 Type *DestTy = VectorType::get(
2021 I->getType()->getScalarType(),
2022 cast<VectorType>(NewOps[0]->getType())->getElementCount());
2023 assert(NewOps.size() == 1 && "cast with #ops != 1");
2024 return Builder.CreateCast(cast<CastInst>(I)->getOpcode(), NewOps[0],
2025 DestTy);
2026 }
2027 case Instruction::GetElementPtr: {
2028 Value *Ptr = NewOps[0];
2029 ArrayRef<Value*> Idx = NewOps.slice(1);
2030 return Builder.CreateGEP(cast<GEPOperator>(I)->getSourceElementType(),
2031 Ptr, Idx, "",
2032 cast<GEPOperator>(I)->getNoWrapFlags());
2033 }
2034 }
2035 llvm_unreachable("failed to rebuild vector instructions");
2036}
2037
2039 IRBuilderBase &Builder) {
2040 // Mask.size() does not need to be equal to the number of vector elements.
2041
2042 assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
2043 Type *EltTy = V->getType()->getScalarType();
2044
2045 if (isa<PoisonValue>(V))
2046 return PoisonValue::get(FixedVectorType::get(EltTy, Mask.size()));
2047
2048 if (match(V, m_Undef()))
2049 return UndefValue::get(FixedVectorType::get(EltTy, Mask.size()));
2050
2052 return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size()));
2053
2054 if (Constant *C = dyn_cast<Constant>(V))
2056 Mask);
2057
2059 switch (I->getOpcode()) {
2060 case Instruction::Add:
2061 case Instruction::FAdd:
2062 case Instruction::Sub:
2063 case Instruction::FSub:
2064 case Instruction::Mul:
2065 case Instruction::FMul:
2066 case Instruction::UDiv:
2067 case Instruction::SDiv:
2068 case Instruction::FDiv:
2069 case Instruction::URem:
2070 case Instruction::SRem:
2071 case Instruction::FRem:
2072 case Instruction::Shl:
2073 case Instruction::LShr:
2074 case Instruction::AShr:
2075 case Instruction::And:
2076 case Instruction::Or:
2077 case Instruction::Xor:
2078 case Instruction::ICmp:
2079 case Instruction::FCmp:
2080 case Instruction::Trunc:
2081 case Instruction::ZExt:
2082 case Instruction::SExt:
2083 case Instruction::FPToUI:
2084 case Instruction::FPToSI:
2085 case Instruction::UIToFP:
2086 case Instruction::SIToFP:
2087 case Instruction::FPTrunc:
2088 case Instruction::FPExt:
2089 case Instruction::Select:
2090 case Instruction::GetElementPtr: {
2092 bool NeedsRebuild =
2093 (Mask.size() !=
2094 cast<FixedVectorType>(I->getType())->getNumElements());
2095 for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
2096 Value *V;
2097 // Recursively call evaluateInDifferentElementOrder on vector arguments
2098 // as well. E.g. GetElementPtr may have scalar operands even if the
2099 // return value is a vector, so we need to examine the operand type.
2100 if (I->getOperand(i)->getType()->isVectorTy())
2101 V = evaluateInDifferentElementOrder(I->getOperand(i), Mask, Builder);
2102 else
2103 V = I->getOperand(i);
2104 NewOps.push_back(V);
2105 NeedsRebuild |= (V != I->getOperand(i));
2106 }
2107 if (NeedsRebuild)
2108 return buildNew(I, NewOps, Builder);
2109 return I;
2110 }
2111 case Instruction::InsertElement: {
2112 int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
2113
2114 // The insertelement was inserting at Element. Figure out which element
2115 // that becomes after shuffling. The answer is guaranteed to be unique
2116 // by CanEvaluateShuffled.
2117 bool Found = false;
2118 int Index = 0;
2119 for (int e = Mask.size(); Index != e; ++Index) {
2120 if (Mask[Index] == Element) {
2121 Found = true;
2122 break;
2123 }
2124 }
2125
2126 // If element is not in Mask, no need to handle the operand 1 (element to
2127 // be inserted). Just evaluate values in operand 0 according to Mask.
2128 if (!Found)
2129 return evaluateInDifferentElementOrder(I->getOperand(0), Mask, Builder);
2130
2131 Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask,
2132 Builder);
2133 Builder.SetInsertPoint(I);
2134 return Builder.CreateInsertElement(V, I->getOperand(1), Index);
2135 }
2136 }
2137 llvm_unreachable("failed to reorder elements of vector instruction!");
2138}
2139
2140// Returns true if the shuffle is extracting a contiguous range of values from
2141// LHS, for example:
2142// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2143// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
2144// Shuffles to: |EE|FF|GG|HH|
2145// +--+--+--+--+
2147 ArrayRef<int> Mask) {
2148 unsigned LHSElems =
2149 cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements();
2150 unsigned MaskElems = Mask.size();
2151 unsigned BegIdx = Mask.front();
2152 unsigned EndIdx = Mask.back();
2153 if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
2154 return false;
2155 for (unsigned I = 0; I != MaskElems; ++I)
2156 if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
2157 return false;
2158 return true;
2159}
2160
2161/// These are the ingredients in an alternate form binary operator as described
2162/// below.
2168 Value *V0 = nullptr, Value *V1 = nullptr) :
2169 Opcode(Opc), Op0(V0), Op1(V1) {}
2170 operator bool() const { return Opcode != 0; }
2171};
2172
2173/// Binops may be transformed into binops with different opcodes and operands.
2174/// Reverse the usual canonicalization to enable folds with the non-canonical
2175/// form of the binop. If a transform is possible, return the elements of the
2176/// new binop. If not, return invalid elements.
2178 Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
2179 Type *Ty = BO->getType();
2180 switch (BO->getOpcode()) {
2181 case Instruction::Shl: {
2182 // shl X, C --> mul X, (1 << C)
2183 Constant *C;
2184 if (match(BO1, m_ImmConstant(C))) {
2186 Instruction::Shl, ConstantInt::get(Ty, 1), C, DL);
2187 assert(ShlOne && "Constant folding of immediate constants failed");
2188 return {Instruction::Mul, BO0, ShlOne};
2189 }
2190 break;
2191 }
2192 case Instruction::Or: {
2193 // or disjoin X, C --> add X, C
2194 if (cast<PossiblyDisjointInst>(BO)->isDisjoint())
2195 return {Instruction::Add, BO0, BO1};
2196 break;
2197 }
2198 case Instruction::Sub:
2199 // sub 0, X --> mul X, -1
2200 if (match(BO0, m_ZeroInt()))
2201 return {Instruction::Mul, BO1, ConstantInt::getAllOnesValue(Ty)};
2202 break;
2203 default:
2204 break;
2205 }
2206 return {};
2207}
2208
2209/// A select shuffle of a select shuffle with a shared operand can be reduced
2210/// to a single select shuffle. This is an obvious improvement in IR, and the
2211/// backend is expected to lower select shuffles efficiently.
2213 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2214
2215 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2217 Shuf.getShuffleMask(Mask);
2218 unsigned NumElts = Mask.size();
2219
2220 // Canonicalize a select shuffle with common operand as Op1.
2221 auto *ShufOp = dyn_cast<ShuffleVectorInst>(Op0);
2222 if (ShufOp && ShufOp->isSelect() &&
2223 (ShufOp->getOperand(0) == Op1 || ShufOp->getOperand(1) == Op1)) {
2224 std::swap(Op0, Op1);
2226 }
2227
2228 ShufOp = dyn_cast<ShuffleVectorInst>(Op1);
2229 if (!ShufOp || !ShufOp->isSelect() ||
2230 (ShufOp->getOperand(0) != Op0 && ShufOp->getOperand(1) != Op0))
2231 return nullptr;
2232
2233 Value *X = ShufOp->getOperand(0), *Y = ShufOp->getOperand(1);
2235 ShufOp->getShuffleMask(Mask1);
2236 assert(Mask1.size() == NumElts && "Vector size changed with select shuffle");
2237
2238 // Canonicalize common operand (Op0) as X (first operand of first shuffle).
2239 if (Y == Op0) {
2240 std::swap(X, Y);
2242 }
2243
2244 // If the mask chooses from X (operand 0), it stays the same.
2245 // If the mask chooses from the earlier shuffle, the other mask value is
2246 // transferred to the combined select shuffle:
2247 // shuf X, (shuf X, Y, M1), M --> shuf X, Y, M'
2248 SmallVector<int, 16> NewMask(NumElts);
2249 for (unsigned i = 0; i != NumElts; ++i)
2250 NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i];
2251
2252 // A select mask with undef elements might look like an identity mask.
2253 assert((ShuffleVectorInst::isSelectMask(NewMask, NumElts) ||
2254 ShuffleVectorInst::isIdentityMask(NewMask, NumElts)) &&
2255 "Unexpected shuffle mask");
2256 return new ShuffleVectorInst(X, Y, NewMask);
2257}
2258
2260 const SimplifyQuery &SQ) {
2261 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2262
2263 // Are we shuffling together some value and that same value after it has been
2264 // modified by a binop with a constant?
2265 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2266 Constant *C;
2267 bool Op0IsBinop;
2268 if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
2269 Op0IsBinop = true;
2270 else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
2271 Op0IsBinop = false;
2272 else
2273 return nullptr;
2274
2275 // The identity constant for a binop leaves a variable operand unchanged. For
2276 // a vector, this is a splat of something like 0, -1, or 1.
2277 // If there's no identity constant for this binop, we're done.
2278 auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
2279 BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
2280 Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
2281 if (!IdC)
2282 return nullptr;
2283
2284 Value *X = Op0IsBinop ? Op1 : Op0;
2285
2286 // Prevent folding in the case the non-binop operand might have NaN values.
2287 // If X can have NaN elements then we have that the floating point math
2288 // operation in the transformed code may not preserve the exact NaN
2289 // bit-pattern -- e.g. `fadd sNaN, 0.0 -> qNaN`.
2290 // This makes the transformation incorrect since the original program would
2291 // have preserved the exact NaN bit-pattern.
2292 // Avoid the folding if X can have NaN elements.
2293 if (Shuf.getType()->getElementType()->isFloatingPointTy() &&
2294 !isKnownNeverNaN(X, SQ))
2295 return nullptr;
2296
2297 // Shuffle identity constants into the lanes that return the original value.
2298 // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
2299 // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
2300 // The existing binop constant vector remains in the same operand position.
2301 ArrayRef<int> Mask = Shuf.getShuffleMask();
2302 Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
2304
2305 bool MightCreatePoisonOrUB =
2307 (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
2308 if (MightCreatePoisonOrUB)
2309 NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true);
2310
2311 // shuf (bop X, C), X, M --> bop X, C'
2312 // shuf X, (bop X, C), M --> bop X, C'
2313 Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
2314 NewBO->copyIRFlags(BO);
2315
2316 // An undef shuffle mask element may propagate as an undef constant element in
2317 // the new binop. That would produce poison where the original code might not.
2318 // If we already made a safe constant, then there's no danger.
2319 if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
2321 return NewBO;
2322}
2323
2324/// If we have an insert of a scalar to a non-zero element of an undefined
2325/// vector and then shuffle that value, that's the same as inserting to the zero
2326/// element and shuffling. Splatting from the zero element is recognized as the
2327/// canonical form of splat.
2329 InstCombiner::BuilderTy &Builder) {
2330 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2331 ArrayRef<int> Mask = Shuf.getShuffleMask();
2332 Value *X;
2333 uint64_t IndexC;
2334
2335 // Match a shuffle that is a splat to a non-zero element.
2337 m_ConstantInt(IndexC)))) ||
2338 !match(Op1, m_Poison()) || match(Mask, m_ZeroMask()) || IndexC == 0)
2339 return nullptr;
2340
2341 // Insert into element 0 of a poison vector.
2342 PoisonValue *PoisonVec = PoisonValue::get(Shuf.getType());
2343 Value *NewIns = Builder.CreateInsertElement(PoisonVec, X, (uint64_t)0);
2344
2345 // Splat from element 0. Any mask element that is poison remains poison.
2346 // For example:
2347 // shuf (inselt poison, X, 2), _, <2,2,undef>
2348 // --> shuf (inselt poison, X, 0), poison, <0,0,undef>
2349 unsigned NumMaskElts =
2350 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2351 SmallVector<int, 16> NewMask(NumMaskElts, 0);
2352 for (unsigned i = 0; i != NumMaskElts; ++i)
2353 if (Mask[i] == PoisonMaskElem)
2354 NewMask[i] = Mask[i];
2355
2356 return new ShuffleVectorInst(NewIns, NewMask);
2357}
2358
2359/// Try to fold shuffles that are the equivalent of a vector select.
2361 if (!Shuf.isSelect())
2362 return nullptr;
2363
2364 // Canonicalize to choose from operand 0 first unless operand 1 is undefined.
2365 // Commuting undef to operand 0 conflicts with another canonicalization.
2366 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2367 if (!match(Shuf.getOperand(1), m_Undef()) &&
2368 Shuf.getMaskValue(0) >= (int)NumElts) {
2369 // TODO: Can we assert that both operands of a shuffle-select are not undef
2370 // (otherwise, it would have been folded by instsimplify?
2371 Shuf.commute();
2372 return &Shuf;
2373 }
2374
2376 return I;
2377
2379 Shuf, getSimplifyQuery().getWithInstruction(&Shuf)))
2380 return I;
2381
2382 BinaryOperator *B0, *B1;
2383 if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
2384 !match(Shuf.getOperand(1), m_BinOp(B1)))
2385 return nullptr;
2386
2387 // If one operand is "0 - X", allow that to be viewed as "X * -1"
2388 // (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired
2389 // with a multiply, we will exit because C0/C1 will not be set.
2390 Value *X, *Y;
2391 Constant *C0 = nullptr, *C1 = nullptr;
2392 bool ConstantsAreOp1;
2393 if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
2394 match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
2395 ConstantsAreOp1 = false;
2396 else if (match(B0, m_CombineOr(m_BinOp(m_Value(X), m_Constant(C0)),
2397 m_Neg(m_Value(X)))) &&
2399 m_Neg(m_Value(Y)))))
2400 ConstantsAreOp1 = true;
2401 else
2402 return nullptr;
2403
2404 // We need matching binops to fold the lanes together.
2407 bool DropNSW = false;
2408 if (ConstantsAreOp1 && Opc0 != Opc1) {
2409 // TODO: We drop "nsw" if shift is converted into multiply because it may
2410 // not be correct when the shift amount is BitWidth - 1. We could examine
2411 // each vector element to determine if it is safe to keep that flag.
2412 if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
2413 DropNSW = true;
2414 if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
2415 assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
2416 Opc0 = AltB0.Opcode;
2417 C0 = cast<Constant>(AltB0.Op1);
2418 } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
2419 assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
2420 Opc1 = AltB1.Opcode;
2421 C1 = cast<Constant>(AltB1.Op1);
2422 }
2423 }
2424
2425 if (Opc0 != Opc1 || !C0 || !C1)
2426 return nullptr;
2427
2428 // The opcodes must be the same. Use a new name to make that clear.
2429 BinaryOperator::BinaryOps BOpc = Opc0;
2430
2431 // Select the constant elements needed for the single binop.
2432 ArrayRef<int> Mask = Shuf.getShuffleMask();
2433 Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask);
2434
2435 // We are moving a binop after a shuffle. When a shuffle has an undefined
2436 // mask element, the result is undefined, but it is not poison or undefined
2437 // behavior. That is not necessarily true for div/rem/shift.
2438 bool MightCreatePoisonOrUB =
2441 if (MightCreatePoisonOrUB)
2443 ConstantsAreOp1);
2444
2445 Value *V;
2446 if (X == Y) {
2447 // Remove a binop and the shuffle by rearranging the constant:
2448 // shuffle (op V, C0), (op V, C1), M --> op V, C'
2449 // shuffle (op C0, V), (op C1, V), M --> op C', V
2450 V = X;
2451 } else {
2452 // If there are 2 different variable operands, we must create a new shuffle
2453 // (select) first, so check uses to ensure that we don't end up with more
2454 // instructions than we started with.
2455 if (!B0->hasOneUse() && !B1->hasOneUse())
2456 return nullptr;
2457
2458 // If we use the original shuffle mask and op1 is *variable*, we would be
2459 // putting an undef into operand 1 of div/rem/shift. This is either UB or
2460 // poison. We do not have to guard against UB when *constants* are op1
2461 // because safe constants guarantee that we do not overflow sdiv/srem (and
2462 // there's no danger for other opcodes).
2463 // TODO: To allow this case, create a new shuffle mask with no undefs.
2464 if (MightCreatePoisonOrUB && !ConstantsAreOp1)
2465 return nullptr;
2466
2467 // Note: In general, we do not create new shuffles in InstCombine because we
2468 // do not know if a target can lower an arbitrary shuffle optimally. In this
2469 // case, the shuffle uses the existing mask, so there is no additional risk.
2470
2471 // Select the variable vectors first, then perform the binop:
2472 // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
2473 // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
2474 V = Builder.CreateShuffleVector(X, Y, Mask);
2475 }
2476
2477 Value *NewBO = ConstantsAreOp1 ? Builder.CreateBinOp(BOpc, V, NewC) :
2478 Builder.CreateBinOp(BOpc, NewC, V);
2479
2480 // Flags are intersected from the 2 source binops. But there are 2 exceptions:
2481 // 1. If we changed an opcode, poison conditions might have changed.
2482 // 2. If the shuffle had undef mask elements, the new binop might have undefs
2483 // where the original code did not. But if we already made a safe constant,
2484 // then there's no danger.
2485 if (auto *NewI = dyn_cast<Instruction>(NewBO)) {
2486 NewI->copyIRFlags(B0);
2487 NewI->andIRFlags(B1);
2488 if (DropNSW)
2489 NewI->setHasNoSignedWrap(false);
2490 if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
2491 NewI->dropPoisonGeneratingFlags();
2492 }
2493 return replaceInstUsesWith(Shuf, NewBO);
2494}
2495
2496/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
2497/// Example (little endian):
2498/// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
2500 bool IsBigEndian) {
2501 // This must be a bitcasted shuffle of 1 vector integer operand.
2502 Type *DestType = Shuf.getType();
2503 Value *X;
2504 if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) ||
2505 !match(Shuf.getOperand(1), m_Poison()) || !DestType->isIntOrIntVectorTy())
2506 return nullptr;
2507
2508 // The source type must have the same number of elements as the shuffle,
2509 // and the source element type must be larger than the shuffle element type.
2510 Type *SrcType = X->getType();
2511 if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||
2512 cast<FixedVectorType>(SrcType)->getNumElements() !=
2513 cast<FixedVectorType>(DestType)->getNumElements() ||
2514 SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)
2515 return nullptr;
2516
2517 assert(Shuf.changesLength() && !Shuf.increasesLength() &&
2518 "Expected a shuffle that decreases length");
2519
2520 // Last, check that the mask chooses the correct low bits for each narrow
2521 // element in the result.
2522 uint64_t TruncRatio =
2523 SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
2524 ArrayRef<int> Mask = Shuf.getShuffleMask();
2525 for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
2526 if (Mask[i] == PoisonMaskElem)
2527 continue;
2528 uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
2529 assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");
2530 if (Mask[i] != (int)LSBIndex)
2531 return nullptr;
2532 }
2533
2534 return new TruncInst(X, DestType);
2535}
2536
2537/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2538/// narrowing (concatenating with poison and extracting back to the original
2539/// length). This allows replacing the wide select with a narrow select.
2541 InstCombiner::BuilderTy &Builder) {
2542 // This must be a narrowing identity shuffle. It extracts the 1st N elements
2543 // of the 1st vector operand of a shuffle.
2544 if (!match(Shuf.getOperand(1), m_Poison()) || !Shuf.isIdentityWithExtract())
2545 return nullptr;
2546
2547 // The vector being shuffled must be a vector select that we can eliminate.
2548 // TODO: The one-use requirement could be eased if X and/or Y are constants.
2549 Value *Cond, *X, *Y;
2550 if (!match(Shuf.getOperand(0),
2552 return nullptr;
2553
2554 // We need a narrow condition value. It must be extended with poison elements
2555 // and have the same number of elements as this shuffle.
2556 unsigned NarrowNumElts =
2557 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2558 Value *NarrowCond;
2559 if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Poison()))) ||
2560 cast<FixedVectorType>(NarrowCond->getType())->getNumElements() !=
2561 NarrowNumElts ||
2562 !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
2563 return nullptr;
2564
2565 // shuf (sel (shuf NarrowCond, poison, WideMask), X, Y), poison, NarrowMask)
2566 // -->
2567 // sel NarrowCond, (shuf X, poison, NarrowMask), (shuf Y, poison, NarrowMask)
2568 Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2569 Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask());
2570 return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
2571}
2572
2573/// Canonicalize FP negate/abs after shuffle.
2575 InstCombiner::BuilderTy &Builder) {
2576 auto *S0 = dyn_cast<Instruction>(Shuf.getOperand(0));
2577 Value *X;
2578 if (!S0 || !match(S0, m_CombineOr(m_FNeg(m_Value(X)), m_FAbs(m_Value(X)))))
2579 return nullptr;
2580
2581 bool IsFNeg = S0->getOpcode() == Instruction::FNeg;
2582
2583 // Match 2-input (binary) shuffle.
2584 auto *S1 = dyn_cast<Instruction>(Shuf.getOperand(1));
2585 Value *Y;
2586 if (!S1 || !match(S1, m_CombineOr(m_FNeg(m_Value(Y)), m_FAbs(m_Value(Y)))) ||
2587 S0->getOpcode() != S1->getOpcode() ||
2588 (!S0->hasOneUse() && !S1->hasOneUse()))
2589 return nullptr;
2590
2591 // shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask)
2592 Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2593 Instruction *NewF;
2594 if (IsFNeg) {
2595 NewF = UnaryOperator::CreateFNeg(NewShuf);
2596 } else {
2598 Shuf.getModule(), Intrinsic::fabs, Shuf.getType());
2599 NewF = CallInst::Create(FAbs, {NewShuf});
2600 }
2601 NewF->copyIRFlags(S0);
2602 NewF->andIRFlags(S1);
2603 return NewF;
2604}
2605
2606/// Canonicalize casts after shuffle.
2608 InstCombiner::BuilderTy &Builder) {
2609 auto *Cast0 = dyn_cast<CastInst>(Shuf.getOperand(0));
2610 if (!Cast0)
2611 return nullptr;
2612
2613 // TODO: Allow other opcodes? That would require easing the type restrictions
2614 // below here.
2615 CastInst::CastOps CastOpcode = Cast0->getOpcode();
2616 switch (CastOpcode) {
2617 case Instruction::SExt:
2618 case Instruction::ZExt:
2619 case Instruction::FPToSI:
2620 case Instruction::FPToUI:
2621 case Instruction::SIToFP:
2622 case Instruction::UIToFP:
2623 break;
2624 default:
2625 return nullptr;
2626 }
2627
2628 VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
2629 VectorType *ShufTy = Shuf.getType();
2630 VectorType *ShufOpTy = cast<VectorType>(Shuf.getOperand(0)->getType());
2631
2632 // TODO: Allow length-increasing shuffles?
2633 if (ShufTy->getElementCount().getKnownMinValue() >
2634 ShufOpTy->getElementCount().getKnownMinValue())
2635 return nullptr;
2636
2637 // shuffle (cast X), Poison, identity-with-extract-mask -->
2638 // cast (shuffle X, Poison, identity-with-extract-mask).
2639 if (isa<PoisonValue>(Shuf.getOperand(1)) && Cast0->hasOneUse() &&
2640 Shuf.isIdentityWithExtract()) {
2641 auto *NewIns = Builder.CreateShuffleVector(Cast0->getOperand(0),
2642 PoisonValue::get(CastSrcTy),
2643 Shuf.getShuffleMask());
2644 return CastInst::Create(Cast0->getOpcode(), NewIns, Shuf.getType());
2645 }
2646
2647 auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));
2648 // Do we have 2 matching cast operands?
2649 if (!Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
2650 Cast0->getSrcTy() != Cast1->getSrcTy())
2651 return nullptr;
2652
2653 // TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
2654 assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
2655 "Expected fixed vector operands for casts and binary shuffle");
2656 if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits())
2657 return nullptr;
2658
2659 // At least one of the operands must have only one use (the shuffle).
2660 if (!Cast0->hasOneUse() && !Cast1->hasOneUse())
2661 return nullptr;
2662
2663 // shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask)
2664 Value *X = Cast0->getOperand(0);
2665 Value *Y = Cast1->getOperand(0);
2666 Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2667 return CastInst::Create(CastOpcode, NewShuf, ShufTy);
2668}
2669
2670/// Try to fold an extract subvector operation.
2672 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2673 if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Poison()))
2674 return nullptr;
2675
2676 // Check if we are extracting all bits of an inserted scalar:
2677 // extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
2678 Value *X;
2679 if (match(Op0, m_BitCast(m_InsertElt(m_Value(), m_Value(X), m_Zero()))) &&
2680 X->getType()->getPrimitiveSizeInBits() ==
2682 return new BitCastInst(X, Shuf.getType());
2683
2684 // Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
2685 Value *Y;
2686 ArrayRef<int> Mask;
2687 if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask))))
2688 return nullptr;
2689
2690 // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
2691 // then combining may result in worse codegen.
2692 if (!Op0->hasOneUse())
2693 return nullptr;
2694
2695 // We are extracting a subvector from a shuffle. Remove excess elements from
2696 // the 1st shuffle mask to eliminate the extract.
2697 //
2698 // This transform is conservatively limited to identity extracts because we do
2699 // not allow arbitrary shuffle mask creation as a target-independent transform
2700 // (because we can't guarantee that will lower efficiently).
2701 //
2702 // If the extracting shuffle has an poison mask element, it transfers to the
2703 // new shuffle mask. Otherwise, copy the original mask element. Example:
2704 // shuf (shuf X, Y, <C0, C1, C2, poison, C4>), poison, <0, poison, 2, 3> -->
2705 // shuf X, Y, <C0, poison, C2, poison>
2706 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2707 SmallVector<int, 16> NewMask(NumElts);
2708 assert(NumElts < Mask.size() &&
2709 "Identity with extract must have less elements than its inputs");
2710
2711 for (unsigned i = 0; i != NumElts; ++i) {
2712 int ExtractMaskElt = Shuf.getMaskValue(i);
2713 int MaskElt = Mask[i];
2714 NewMask[i] = ExtractMaskElt == PoisonMaskElem ? ExtractMaskElt : MaskElt;
2715 }
2716 return new ShuffleVectorInst(X, Y, NewMask);
2717}
2718
2719/// Try to replace a shuffle with an insertelement or try to replace a shuffle
2720/// operand with the operand of an insertelement.
2722 InstCombinerImpl &IC) {
2723 Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
2725 Shuf.getShuffleMask(Mask);
2726
2727 int NumElts = Mask.size();
2728 int InpNumElts = cast<FixedVectorType>(V0->getType())->getNumElements();
2729
2730 // This is a specialization of a fold in SimplifyDemandedVectorElts. We may
2731 // not be able to handle it there if the insertelement has >1 use.
2732 // If the shuffle has an insertelement operand but does not choose the
2733 // inserted scalar element from that value, then we can replace that shuffle
2734 // operand with the source vector of the insertelement.
2735 Value *X;
2736 uint64_t IdxC;
2737 if (match(V0, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2738 // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
2739 if (!is_contained(Mask, (int)IdxC))
2740 return IC.replaceOperand(Shuf, 0, X);
2741 }
2742 if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2743 // Offset the index constant by the vector width because we are checking for
2744 // accesses to the 2nd vector input of the shuffle.
2745 IdxC += InpNumElts;
2746 // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
2747 if (!is_contained(Mask, (int)IdxC))
2748 return IC.replaceOperand(Shuf, 1, X);
2749 }
2750 // For the rest of the transform, the shuffle must not change vector sizes.
2751 // TODO: This restriction could be removed if the insert has only one use
2752 // (because the transform would require a new length-changing shuffle).
2753 if (NumElts != InpNumElts)
2754 return nullptr;
2755
2756 // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
2757 auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
2758 // We need an insertelement with a constant index.
2759 if (!match(V0, m_InsertElt(m_Value(), m_Value(Scalar),
2760 m_ConstantInt(IndexC))))
2761 return false;
2762
2763 // Test the shuffle mask to see if it splices the inserted scalar into the
2764 // operand 1 vector of the shuffle.
2765 int NewInsIndex = -1;
2766 for (int i = 0; i != NumElts; ++i) {
2767 // Ignore undef mask elements.
2768 if (Mask[i] == -1)
2769 continue;
2770
2771 // The shuffle takes elements of operand 1 without lane changes.
2772 if (Mask[i] == NumElts + i)
2773 continue;
2774
2775 // The shuffle must choose the inserted scalar exactly once.
2776 if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
2777 return false;
2778
2779 // The shuffle is placing the inserted scalar into element i.
2780 NewInsIndex = i;
2781 }
2782
2783 assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
2784
2785 // Index is updated to the potentially translated insertion lane.
2786 IndexC = ConstantInt::get(IndexC->getIntegerType(), NewInsIndex);
2787 return true;
2788 };
2789
2790 // If the shuffle is unnecessary, insert the scalar operand directly into
2791 // operand 1 of the shuffle. Example:
2792 // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
2793 Value *Scalar;
2794 ConstantInt *IndexC;
2795 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2796 return InsertElementInst::Create(V1, Scalar, IndexC);
2797
2798 // Try again after commuting shuffle. Example:
2799 // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
2800 // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
2801 std::swap(V0, V1);
2803 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2804 return InsertElementInst::Create(V1, Scalar, IndexC);
2805
2806 return nullptr;
2807}
2808
2810 // Match the operands as identity with padding (also known as concatenation
2811 // with undef) shuffles of the same source type. The backend is expected to
2812 // recreate these concatenations from a shuffle of narrow operands.
2813 auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
2814 auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
2815 if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
2816 !Shuffle1 || !Shuffle1->isIdentityWithPadding())
2817 return nullptr;
2818
2819 // We limit this transform to power-of-2 types because we expect that the
2820 // backend can convert the simplified IR patterns to identical nodes as the
2821 // original IR.
2822 // TODO: If we can verify the same behavior for arbitrary types, the
2823 // power-of-2 checks can be removed.
2824 Value *X = Shuffle0->getOperand(0);
2825 Value *Y = Shuffle1->getOperand(0);
2826 if (X->getType() != Y->getType() ||
2827 !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) ||
2829 cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||
2830 !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) ||
2831 match(X, m_Undef()) || match(Y, m_Undef()))
2832 return nullptr;
2833 assert(match(Shuffle0->getOperand(1), m_Undef()) &&
2834 match(Shuffle1->getOperand(1), m_Undef()) &&
2835 "Unexpected operand for identity shuffle");
2836
2837 // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
2838 // operands directly by adjusting the shuffle mask to account for the narrower
2839 // types:
2840 // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
2841 int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements();
2842 int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements();
2843 assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
2844
2845 ArrayRef<int> Mask = Shuf.getShuffleMask();
2846 SmallVector<int, 16> NewMask(Mask.size(), -1);
2847 for (int i = 0, e = Mask.size(); i != e; ++i) {
2848 if (Mask[i] == -1)
2849 continue;
2850
2851 // If this shuffle is choosing an undef element from 1 of the sources, that
2852 // element is undef.
2853 if (Mask[i] < WideElts) {
2854 if (Shuffle0->getMaskValue(Mask[i]) == -1)
2855 continue;
2856 } else {
2857 if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
2858 continue;
2859 }
2860
2861 // If this shuffle is choosing from the 1st narrow op, the mask element is
2862 // the same. If this shuffle is choosing from the 2nd narrow op, the mask
2863 // element is offset down to adjust for the narrow vector widths.
2864 if (Mask[i] < WideElts) {
2865 assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
2866 NewMask[i] = Mask[i];
2867 } else {
2868 assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
2869 NewMask[i] = Mask[i] - (WideElts - NarrowElts);
2870 }
2871 }
2872 return new ShuffleVectorInst(X, Y, NewMask);
2873}
2874
2875// Splatting the first element of the result of a BinOp, where any of the
2876// BinOp's operands are the result of a first element splat can be simplified to
2877// splatting the first element of the result of the BinOp
2879 if (!match(SVI.getOperand(1), m_Poison()) ||
2880 !match(SVI.getShuffleMask(), m_ZeroMask()) ||
2881 !SVI.getOperand(0)->hasOneUse())
2882 return nullptr;
2883
2884 Value *Op0 = SVI.getOperand(0);
2885 Value *X, *Y;
2887 m_Value(Y))) &&
2888 !match(Op0, m_BinOp(m_Value(X),
2890 return nullptr;
2891 if (X->getType() != Y->getType())
2892 return nullptr;
2893
2894 auto *BinOp = cast<BinaryOperator>(Op0);
2896 return nullptr;
2897
2898 Value *NewBO = Builder.CreateBinOp(BinOp->getOpcode(), X, Y);
2899 if (auto NewBOI = dyn_cast<Instruction>(NewBO))
2900 NewBOI->copyIRFlags(BinOp);
2901
2902 return new ShuffleVectorInst(NewBO, SVI.getShuffleMask());
2903}
2904
2906 Value *LHS = SVI.getOperand(0);
2907 Value *RHS = SVI.getOperand(1);
2908 SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI);
2909 if (auto *V = simplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),
2910 SVI.getType(), ShufQuery))
2911 return replaceInstUsesWith(SVI, V);
2912
2913 if (Instruction *I = simplifyBinOpSplats(SVI))
2914 return I;
2915
2916 // Canonicalize splat shuffle to use poison RHS. Handle this explicitly in
2917 // order to support scalable vectors.
2918 if (match(SVI.getShuffleMask(), m_ZeroMask()) && !isa<PoisonValue>(RHS))
2919 return replaceOperand(SVI, 1, PoisonValue::get(RHS->getType()));
2920
2921 if (isa<ScalableVectorType>(LHS->getType()))
2922 return nullptr;
2923
2924 unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();
2925 unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();
2926
2927 // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
2928 //
2929 // if X and Y are of the same (vector) type, and the element size is not
2930 // changed by the bitcasts, we can distribute the bitcasts through the
2931 // shuffle, hopefully reducing the number of instructions. We make sure that
2932 // at least one bitcast only has one use, so we don't *increase* the number of
2933 // instructions here.
2934 Value *X, *Y;
2935 if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_BitCast(m_Value(Y))) &&
2936 X->getType()->isVectorTy() && X->getType() == Y->getType() &&
2937 X->getType()->getScalarSizeInBits() ==
2938 SVI.getType()->getScalarSizeInBits() &&
2939 (LHS->hasOneUse() || RHS->hasOneUse())) {
2940 Value *V = Builder.CreateShuffleVector(X, Y, SVI.getShuffleMask(),
2941 SVI.getName() + ".uncasted");
2942 return new BitCastInst(V, SVI.getType());
2943 }
2944
2945 ArrayRef<int> Mask = SVI.getShuffleMask();
2946
2947 // Peek through a bitcasted shuffle operand by scaling the mask. If the
2948 // simulated shuffle can simplify, then this shuffle is unnecessary:
2949 // shuf (bitcast X), undef, Mask --> bitcast X'
2950 // TODO: This could be extended to allow length-changing shuffles.
2951 // The transform might also be obsoleted if we allowed canonicalization
2952 // of bitcasted shuffles.
2953 if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
2954 X->getType()->isVectorTy() && VWidth == LHSWidth) {
2955 // Try to create a scaled mask constant.
2956 auto *XType = cast<FixedVectorType>(X->getType());
2957 unsigned XNumElts = XType->getNumElements();
2958 SmallVector<int, 16> ScaledMask;
2959 if (scaleShuffleMaskElts(XNumElts, Mask, ScaledMask)) {
2960 // If the shuffled source vector simplifies, cast that value to this
2961 // shuffle's type.
2962 if (auto *V = simplifyShuffleVectorInst(X, UndefValue::get(XType),
2963 ScaledMask, XType, ShufQuery))
2964 return BitCastInst::Create(Instruction::BitCast, V, SVI.getType());
2965 }
2966 }
2967
2968 // shuffle x, x, mask --> shuffle x, undef, mask'
2969 if (LHS == RHS) {
2970 assert(!match(RHS, m_Undef()) &&
2971 "Shuffle with 2 undef ops not simplified?");
2972 return new ShuffleVectorInst(LHS, createUnaryMask(Mask, LHSWidth));
2973 }
2974
2975 // shuffle undef, x, mask --> shuffle x, undef, mask'
2976 if (match(LHS, m_Undef())) {
2977 SVI.commute();
2978 return &SVI;
2979 }
2980
2982 return I;
2983
2984 if (Instruction *I = foldSelectShuffle(SVI))
2985 return I;
2986
2987 if (Instruction *I = foldTruncShuffle(SVI, DL.isBigEndian()))
2988 return I;
2989
2991 return I;
2992
2994 return I;
2995
2996 if (Instruction *I = foldCastShuffle(SVI, Builder))
2997 return I;
2998
2999 APInt PoisonElts(VWidth, 0);
3000 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
3001 if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, PoisonElts)) {
3002 if (V != &SVI)
3003 return replaceInstUsesWith(SVI, V);
3004 return &SVI;
3005 }
3006
3008 return I;
3009
3010 // These transforms have the potential to lose undef knowledge, so they are
3011 // intentionally placed after SimplifyDemandedVectorElts().
3012 if (Instruction *I = foldShuffleWithInsert(SVI, *this))
3013 return I;
3015 return I;
3016
3017 if (match(RHS, m_Constant())) {
3018 if (auto *SI = dyn_cast<SelectInst>(LHS)) {
3019 // We cannot do this fold for elementwise select since ShuffleVector is
3020 // not elementwise.
3021 if (SI->getCondition()->getType()->isIntegerTy() &&
3022 (isa<PoisonValue>(RHS) ||
3023 isGuaranteedNotToBePoison(SI->getCondition()))) {
3024 if (Instruction *I = FoldOpIntoSelect(SVI, SI))
3025 return I;
3026 }
3027 }
3028 if (auto *PN = dyn_cast<PHINode>(LHS)) {
3029 if (Instruction *I = foldOpIntoPhi(SVI, PN, /*AllowMultipleUses=*/true))
3030 return I;
3031 }
3032 }
3033
3034 if (match(RHS, m_Poison()) && canEvaluateShuffled(LHS, Mask)) {
3036 return replaceInstUsesWith(SVI, V);
3037 }
3038
3039 // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
3040 // a non-vector type. We can instead bitcast the original vector followed by
3041 // an extract of the desired element:
3042 //
3043 // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
3044 // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3045 // %1 = bitcast <4 x i8> %sroa to i32
3046 // Becomes:
3047 // %bc = bitcast <16 x i8> %in to <4 x i32>
3048 // %ext = extractelement <4 x i32> %bc, i32 0
3049 //
3050 // If the shuffle is extracting a contiguous range of values from the input
3051 // vector then each use which is a bitcast of the extracted size can be
3052 // replaced. This will work if the vector types are compatible, and the begin
3053 // index is aligned to a value in the casted vector type. If the begin index
3054 // isn't aligned then we can shuffle the original vector (keeping the same
3055 // vector type) before extracting.
3056 //
3057 // This code will bail out if the target type is fundamentally incompatible
3058 // with vectors of the source type.
3059 //
3060 // Example of <16 x i8>, target type i32:
3061 // Index range [4,8): v-----------v Will work.
3062 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
3063 // <16 x i8>: | | | | | | | | | | | | | | | | |
3064 // <4 x i32>: | | | | |
3065 // +-----------+-----------+-----------+-----------+
3066 // Index range [6,10): ^-----------^ Needs an extra shuffle.
3067 // Target type i40: ^--------------^ Won't work, bail.
3068 bool MadeChange = false;
3069 if (isShuffleExtractingFromLHS(SVI, Mask)) {
3070 Value *V = LHS;
3071 unsigned MaskElems = Mask.size();
3072 auto *SrcTy = cast<FixedVectorType>(V->getType());
3073 unsigned VecBitWidth = DL.getTypeSizeInBits(SrcTy);
3074 unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
3075 assert(SrcElemBitWidth && "vector elements must have a bitwidth");
3076 unsigned SrcNumElems = SrcTy->getNumElements();
3079 for (User *U : SVI.users())
3080 if (BitCastInst *BC = dyn_cast<BitCastInst>(U)) {
3081 // Only visit bitcasts that weren't previously handled.
3082 if (BC->use_empty())
3083 continue;
3084 // Prefer to combine bitcasts of bitcasts before attempting this fold.
3085 if (BC->hasOneUse()) {
3086 auto *BC2 = dyn_cast<BitCastInst>(BC->user_back());
3087 if (BC2 && isEliminableCastPair(BC, BC2))
3088 continue;
3089 }
3090 BCs.push_back(BC);
3091 }
3092 for (BitCastInst *BC : BCs) {
3093 unsigned BegIdx = Mask.front();
3094 Type *TgtTy = BC->getDestTy();
3095 unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
3096 if (!TgtElemBitWidth)
3097 continue;
3098 unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
3099 bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
3100 bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
3101 if (!VecBitWidthsEqual)
3102 continue;
3104 continue;
3105 auto *CastSrcTy = FixedVectorType::get(TgtTy, TgtNumElems);
3106 if (!BegIsAligned) {
3107 // Shuffle the input so [0,NumElements) contains the output, and
3108 // [NumElems,SrcNumElems) is undef.
3109 SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);
3110 for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
3111 ShuffleMask[I] = Idx;
3112 V = Builder.CreateShuffleVector(V, ShuffleMask,
3113 SVI.getName() + ".extract");
3114 BegIdx = 0;
3115 }
3116 unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
3117 assert(SrcElemsPerTgtElem);
3118 BegIdx /= SrcElemsPerTgtElem;
3119 auto [It, Inserted] = NewBCs.try_emplace(CastSrcTy);
3120 if (Inserted)
3121 It->second = Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
3122 auto *Ext = Builder.CreateExtractElement(It->second, BegIdx,
3123 SVI.getName() + ".extract");
3124 // The shufflevector isn't being replaced: the bitcast that used it
3125 // is. InstCombine will visit the newly-created instructions.
3126 replaceInstUsesWith(*BC, Ext);
3127 MadeChange = true;
3128 }
3129 }
3130
3131 // If the LHS is a shufflevector itself, see if we can combine it with this
3132 // one without producing an unusual shuffle.
3133 // Cases that might be simplified:
3134 // 1.
3135 // x1=shuffle(v1,v2,mask1)
3136 // x=shuffle(x1,undef,mask)
3137 // ==>
3138 // x=shuffle(v1,undef,newMask)
3139 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
3140 // 2.
3141 // x1=shuffle(v1,undef,mask1)
3142 // x=shuffle(x1,x2,mask)
3143 // where v1.size() == mask1.size()
3144 // ==>
3145 // x=shuffle(v1,x2,newMask)
3146 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
3147 // 3.
3148 // x2=shuffle(v2,undef,mask2)
3149 // x=shuffle(x1,x2,mask)
3150 // where v2.size() == mask2.size()
3151 // ==>
3152 // x=shuffle(x1,v2,newMask)
3153 // newMask[i] = (mask[i] < x1.size())
3154 // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
3155 // 4.
3156 // x1=shuffle(v1,undef,mask1)
3157 // x2=shuffle(v2,undef,mask2)
3158 // x=shuffle(x1,x2,mask)
3159 // where v1.size() == v2.size()
3160 // ==>
3161 // x=shuffle(v1,v2,newMask)
3162 // newMask[i] = (mask[i] < x1.size())
3163 // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
3164 //
3165 // Here we are really conservative:
3166 // we are absolutely afraid of producing a shuffle mask not in the input
3167 // program, because the code gen may not be smart enough to turn a merged
3168 // shuffle into two specific shuffles: it may produce worse code. As such,
3169 // we only merge two shuffles if the result is either a splat or one of the
3170 // input shuffle masks. In this case, merging the shuffles just removes
3171 // one instruction, which we know is safe. This is good for things like
3172 // turning: (splat(splat)) -> splat, or
3173 // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
3176 if (LHSShuffle)
3177 if (!match(LHSShuffle->getOperand(1), m_Poison()) &&
3178 !match(RHS, m_Poison()))
3179 LHSShuffle = nullptr;
3180 if (RHSShuffle)
3181 if (!match(RHSShuffle->getOperand(1), m_Poison()))
3182 RHSShuffle = nullptr;
3183 if (!LHSShuffle && !RHSShuffle)
3184 return MadeChange ? &SVI : nullptr;
3185
3186 Value* LHSOp0 = nullptr;
3187 Value* LHSOp1 = nullptr;
3188 Value* RHSOp0 = nullptr;
3189 unsigned LHSOp0Width = 0;
3190 unsigned RHSOp0Width = 0;
3191 if (LHSShuffle) {
3192 LHSOp0 = LHSShuffle->getOperand(0);
3193 LHSOp1 = LHSShuffle->getOperand(1);
3194 LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements();
3195 }
3196 if (RHSShuffle) {
3197 RHSOp0 = RHSShuffle->getOperand(0);
3198 RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements();
3199 }
3200 Value* newLHS = LHS;
3201 Value* newRHS = RHS;
3202 if (LHSShuffle) {
3203 // case 1
3204 if (match(RHS, m_Poison())) {
3205 newLHS = LHSOp0;
3206 newRHS = LHSOp1;
3207 }
3208 // case 2 or 4
3209 else if (LHSOp0Width == LHSWidth) {
3210 newLHS = LHSOp0;
3211 }
3212 }
3213 // case 3 or 4
3214 if (RHSShuffle && RHSOp0Width == LHSWidth) {
3215 newRHS = RHSOp0;
3216 }
3217 // case 4
3218 if (LHSOp0 == RHSOp0) {
3219 newLHS = LHSOp0;
3220 newRHS = nullptr;
3221 }
3222
3223 if (newLHS == LHS && newRHS == RHS)
3224 return MadeChange ? &SVI : nullptr;
3225
3226 ArrayRef<int> LHSMask;
3227 ArrayRef<int> RHSMask;
3228 if (newLHS != LHS)
3229 LHSMask = LHSShuffle->getShuffleMask();
3230 if (RHSShuffle && newRHS != RHS)
3231 RHSMask = RHSShuffle->getShuffleMask();
3232
3233 unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
3234 SmallVector<int, 16> newMask;
3235 bool isSplat = true;
3236 int SplatElt = -1;
3237 // Create a new mask for the new ShuffleVectorInst so that the new
3238 // ShuffleVectorInst is equivalent to the original one.
3239 for (unsigned i = 0; i < VWidth; ++i) {
3240 int eltMask;
3241 if (Mask[i] < 0) {
3242 // This element is a poison value.
3243 eltMask = -1;
3244 } else if (Mask[i] < (int)LHSWidth) {
3245 // This element is from left hand side vector operand.
3246 //
3247 // If LHS is going to be replaced (case 1, 2, or 4), calculate the
3248 // new mask value for the element.
3249 if (newLHS != LHS) {
3250 eltMask = LHSMask[Mask[i]];
3251 // If the value selected is an poison value, explicitly specify it
3252 // with a -1 mask value.
3253 if (eltMask >= (int)LHSOp0Width && isa<PoisonValue>(LHSOp1))
3254 eltMask = -1;
3255 } else
3256 eltMask = Mask[i];
3257 } else {
3258 // This element is from right hand side vector operand
3259 //
3260 // If the value selected is a poison value, explicitly specify it
3261 // with a -1 mask value. (case 1)
3262 if (match(RHS, m_Poison()))
3263 eltMask = -1;
3264 // If RHS is going to be replaced (case 3 or 4), calculate the
3265 // new mask value for the element.
3266 else if (newRHS != RHS) {
3267 eltMask = RHSMask[Mask[i]-LHSWidth];
3268 // If the value selected is an poison value, explicitly specify it
3269 // with a -1 mask value.
3270 if (eltMask >= (int)RHSOp0Width) {
3271 assert(match(RHSShuffle->getOperand(1), m_Poison()) &&
3272 "should have been check above");
3273 eltMask = -1;
3274 }
3275 } else
3276 eltMask = Mask[i]-LHSWidth;
3277
3278 // If LHS's width is changed, shift the mask value accordingly.
3279 // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
3280 // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
3281 // If newRHS == newLHS, we want to remap any references from newRHS to
3282 // newLHS so that we can properly identify splats that may occur due to
3283 // obfuscation across the two vectors.
3284 if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
3285 eltMask += newLHSWidth;
3286 }
3287
3288 // Check if this could still be a splat.
3289 if (eltMask >= 0) {
3290 if (SplatElt >= 0 && SplatElt != eltMask)
3291 isSplat = false;
3292 SplatElt = eltMask;
3293 }
3294
3295 newMask.push_back(eltMask);
3296 }
3297
3298 // If the result mask is equal to one of the original shuffle masks,
3299 // or is a splat, do the replacement.
3300 if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
3301 if (!newRHS)
3302 newRHS = PoisonValue::get(newLHS->getType());
3303 return new ShuffleVectorInst(newLHS, newRHS, newMask);
3304 }
3305
3306 return MadeChange ? &SVI : nullptr;
3307}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
Hexagon Common GEP
This file provides internal interfaces used to implement the InstCombine.
static Instruction * foldConstantInsEltIntoShuffle(InsertElementInst &InsElt)
insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex --> shufflevector X,...
static Value * evaluateInDifferentElementOrder(Value *V, ArrayRef< int > Mask, IRBuilderBase &Builder)
static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl< int > &Mask)
If V is a shuffle of values that ONLY returns elements from either LHS or RHS, return the shuffle mas...
static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl< int > &Mask, Value *PermittedRHS, InstCombinerImpl &IC, bool &Rerun)
static APInt findDemandedEltsByAllUsers(Value *V)
Find union of elements of V demanded by all its users.
static Instruction * foldTruncInsEltPair(InsertElementInst &InsElt, bool IsBigEndian, InstCombiner::BuilderTy &Builder)
If we are inserting 2 halves of a value into adjacent elements of a vector, try to convert to a singl...
static Instruction * foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf, const SimplifyQuery &SQ)
static Instruction * foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf)
static Instruction * foldIdentityExtractShuffle(ShuffleVectorInst &Shuf)
Try to fold an extract subvector operation.
static bool findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr, APInt &UnionUsedElts)
Find elements of V demanded by UserInstr.
static Instruction * foldInsEltIntoSplat(InsertElementInst &InsElt)
Try to fold an insert element into an existing splat shuffle by changing the shuffle's mask to includ...
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
static Instruction * foldShuffleWithInsert(ShuffleVectorInst &Shuf, InstCombinerImpl &IC)
Try to replace a shuffle with an insertelement or try to replace a shuffle operand with the operand o...
static Instruction * canonicalizeInsertSplat(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
If we have an insert of a scalar to a non-zero element of an undefined vector and then shuffle that v...
static Instruction * foldTruncShuffle(ShuffleVectorInst &Shuf, bool IsBigEndian)
Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
static bool replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, InstCombinerImpl &IC)
If we have insertion into a vector that is wider than the vector that we are extracting from,...
static bool cheapToScalarize(Value *V, Value *EI)
Return true if the value is cheaper to scalarize than it is to leave as a vector operation.
static Value * buildNew(Instruction *I, ArrayRef< Value * > NewOps, IRBuilderBase &Builder)
Rebuild a new instruction just like 'I' but with the new operands given.
static bool canEvaluateShuffled(Value *V, ArrayRef< int > Mask, unsigned Depth=5)
Return true if we can evaluate the specified expression tree if the vector elements were shuffled in ...
static Instruction * foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf)
A select shuffle of a select shuffle with a shared operand can be reduced to a single select shuffle.
static Instruction * hoistInsEltConst(InsertElementInst &InsElt2, InstCombiner::BuilderTy &Builder)
If we have an insertelement instruction feeding into another insertelement and the 2nd is inserting a...
static Instruction * foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize FP negate/abs after shuffle.
static Instruction * foldCastShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize casts after shuffle.
static Instruction * narrowInsElt(InsertElementInst &InsElt, InstCombiner::BuilderTy &Builder)
If both the base vector and the inserted element are extended from the same type, do the insert eleme...
static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf)
static Instruction * foldInsSequenceIntoSplat(InsertElementInst &InsElt)
Turn a chain of inserts that splats a value into an insert + shuffle: insertelt(insertelt(insertelt(i...
static Instruction * foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt)
Try to fold an extract+insert element into an existing identity shuffle by changing the shuffle's mas...
static ConstantInt * getPreferredVectorIndex(ConstantInt *IndexC)
Given a constant index for a extractelement or insertelement instruction, return it with the canonica...
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, ArrayRef< int > Mask)
static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL)
Binops may be transformed into binops with different opcodes and operands.
This file provides the interface for the instcombine pass implementation.
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define I(x, y, z)
Definition MD5.cpp:57
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file implements the SmallBitVector class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget)
If both arms of a vector select are concatenated vectors, split the select, and concatenate the resul...
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1076
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1535
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & front() const
Get the first element.
Definition ArrayRef.h:144
size_t size() const
Get the array size.
Definition ArrayRef.h:141
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:185
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
BinaryOps getOpcode() const
Definition InstrTypes.h:374
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:219
This class represents a no-op cast from one type to another.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class is the base class for the comparison instructions.
Definition InstrTypes.h:664
static LLVM_ABI CmpInst * CreateWithCopiedFlags(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Instruction *FlagsSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate, the two operands and the instructio...
OtherOps getOpcode() const
Get the opcode casted to the right type.
Definition InstrTypes.h:760
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * getShuffleVector(Constant *V1, Constant *V2, ArrayRef< int > Mask, Type *OnlyIfReducedTy=nullptr)
static LLVM_ABI Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:269
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:162
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:205
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:254
This instruction extracts a single (scalar) element from a VectorType value.
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
VectorType * getVectorOperandType() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:873
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
LLVM_ABI void setNoWrapFlags(GEPNoWrapFlags NW)
Set nowrap flags for GEP instruction.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This instruction inserts a single (scalar) element into a VectorType value.
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
VectorType * getType() const
Overload to return most specific vector type.
This instruction inserts a struct field of array element value into an aggregate value.
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
Instruction * foldSelectShuffle(ShuffleVectorInst &Shuf)
Try to fold shuffles that are the equivalent of a vector select.
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false, bool SimplifyBothArms=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * visitInsertValueInst(InsertValueInst &IV)
Try to find redundant insertvalue instructions, like the following ones: %0 = insertvalue { i8,...
Instruction * visitInsertElementInst(InsertElementInst &IE)
Instruction * visitExtractElementInst(ExtractElementInst &EI)
Instruction * simplifyBinOpSplats(ShuffleVectorInst &SVI)
Instruction * foldAggregateConstructionIntoAggregateReuse(InsertValueInst &OrigIVI)
Look for chain of insertvalue's that fully define an aggregate, and trace back the values inserted,...
Instruction * visitShuffleVectorInst(ShuffleVectorInst &SVI)
SimplifyQuery SQ
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
const DataLayout & DL
void addToWorklist(Instruction *I)
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
BuilderTy & Builder
const SimplifyQuery & getSimplifyQuery() const
LLVM_ABI bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
LLVM_ABI bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool isShift() const
LLVM_ABI void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
bool isIntDivRem() const
A wrapper class for inspecting calls to intrinsic functions.
std::pair< iterator, bool > try_emplace(const KeyT &Key, Ts &&...Args)
Definition MapVector.h:118
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition Constants.h:1660
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
bool changesLength() const
Return true if this shuffle returns a vector with a different number of elements than its source vect...
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
static LLVM_ABI bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
VectorType * getType() const
Overload to return most specific vector type.
bool increasesLength() const
Return true if this shuffle returns a vector with a greater number of elements than its source vector...
LLVM_ABI bool isIdentityWithExtract() const
Return true if this shuffle extracts the first N elements of exactly one source vector.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
LLVM_ABI void commute()
Swap the operands and adjust the mask to preserve the semantics of the instruction.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool all() const
Returns true if all bits are set.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:314
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:290
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:263
LLVM_ABI unsigned getStructNumElements() const
LLVM_ABI uint64_t getArrayNumElements() const
@ ArrayTyID
Arrays.
Definition Type.h:76
@ StructTyID
Structures.
Definition Type.h:75
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:370
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:186
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
TypeID getTypeID() const
Return the type id for the type.
Definition Type.h:138
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:317
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:139
UnaryOps getOpcode() const
Definition InstrTypes.h:154
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition Value.cpp:1099
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:549
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
User * user_back()
Definition Value.h:412
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Type * getElementType() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
BinaryOpc_match< LHS, RHS, false > m_BinOp(unsigned Opcode, const LHS &L, const RHS &R)
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
auto m_Cmp()
Matches any compare instruction and ignore it.
auto m_Poison()
Match an arbitrary poison constant.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
auto m_Value()
Match an arbitrary value and ignore it.
auto m_Constant()
Match an arbitrary Constant and ignore it.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_UnOp()
Match an arbitrary unary operation and ignore it.
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
LLVM_ABI Value * simplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef< int > Mask, Type *RetTy, const SimplifyQuery &Q)
Given operands for a ShuffleVectorInst, fold the result or return null.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI Value * simplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an InsertValueInst, fold the result or return null.
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
constexpr int PoisonMaskElem
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I, bool IgnoreUBImplyingAttrs=true)
Don't use information from its non-constant operands.
LLVM_ABI Value * simplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, const SimplifyQuery &Q)
Given operands for an InsertElement, fold the result or return null.
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2018
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
LLVM_ABI Value * simplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q)
Given operands for an ExtractElementInst, fold the result or return null.
LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
These are the ingredients in an alternate form binary operator as described below.
BinopElts(BinaryOperator::BinaryOps Opc=(BinaryOperator::BinaryOps) 0, Value *V0=nullptr, Value *V1=nullptr)
BinaryOperator::BinaryOps Opcode
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:334