doxygen/VectorUtils_8cpp_source.html

//===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This file defines vectorizer utilities.

//

//===----------------------------------------------------------------------===//


#include "llvm/Analysis/VectorUtils.h"

#include "llvm/ADT/EquivalenceClasses.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/Analysis/DemandedBits.h"

#include "llvm/Analysis/LoopInfo.h"

#include "llvm/Analysis/LoopIterator.h"

#include "llvm/Analysis/ScalarEvolution.h"

#include "llvm/Analysis/ScalarEvolutionExpressions.h"

#include "llvm/Analysis/TargetTransformInfo.h"

#include "llvm/Analysis/ValueTracking.h"

#include "llvm/IR/Constants.h"

#include "llvm/IR/DerivedTypes.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/MemoryModelRelaxationAnnotations.h"

#include "llvm/IR/PatternMatch.h"

#include "llvm/IR/Value.h"

#include "llvm/Support/CommandLine.h"


#define DEBUG_TYPE "vectorutils"


using namespace llvm;

using namespace llvm::PatternMatch;


/// Maximum factor for an interleaved memory access.

static cl::opt<unsigned> MaxInterleaveGroupFactor(

    "max-interleave-group-factor", cl::Hidden,

    cl::desc("Maximum factor for an interleaved access group (default = 8)"),

    cl::init(8));


/// Return true if all of the intrinsic's arguments and return type are scalars

/// for the scalar form of the intrinsic, and vectors for the vector form of the

/// intrinsic (except operands that are marked as always being scalar by

/// isVectorIntrinsicWithScalarOpAtArg).


bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {

  switch (ID) {

  case Intrinsic::abs:   // Begin integer bit-manipulation.

  case Intrinsic::bswap:

  case Intrinsic::bitreverse:

  case Intrinsic::ctpop:

  case Intrinsic::ctlz:

  case Intrinsic::cttz:

  case Intrinsic::fshl:

  case Intrinsic::fshr:

  case Intrinsic::smax:

  case Intrinsic::smin:

  case Intrinsic::umax:

  case Intrinsic::umin:

  case Intrinsic::sadd_sat:

  case Intrinsic::ssub_sat:

  case Intrinsic::uadd_sat:

  case Intrinsic::usub_sat:

  case Intrinsic::smul_fix:

  case Intrinsic::smul_fix_sat:

  case Intrinsic::umul_fix:

  case Intrinsic::umul_fix_sat:

  case Intrinsic::uadd_with_overflow:

  case Intrinsic::sadd_with_overflow:

  case Intrinsic::usub_with_overflow:

  case Intrinsic::ssub_with_overflow:

  case Intrinsic::umul_with_overflow:

  case Intrinsic::smul_with_overflow:

  case Intrinsic::sqrt: // Begin floating-point.

  case Intrinsic::asin:

  case Intrinsic::acos:

  case Intrinsic::atan:

  case Intrinsic::atan2:

  case Intrinsic::sin:

  case Intrinsic::cos:

  case Intrinsic::sincos:

  case Intrinsic::sincospi:

  case Intrinsic::tan:

  case Intrinsic::sinh:

  case Intrinsic::cosh:

  case Intrinsic::tanh:

  case Intrinsic::exp:

  case Intrinsic::exp10:

  case Intrinsic::exp2:

  case Intrinsic::frexp:

  case Intrinsic::ldexp:

  case Intrinsic::log:

  case Intrinsic::log10:

  case Intrinsic::log2:

  case Intrinsic::fabs:

  case Intrinsic::minnum:

  case Intrinsic::maxnum:

  case Intrinsic::minimum:

  case Intrinsic::maximum:

  case Intrinsic::minimumnum:

  case Intrinsic::maximumnum:

  case Intrinsic::modf:

  case Intrinsic::copysign:

  case Intrinsic::floor:

  case Intrinsic::ceil:

  case Intrinsic::trunc:

  case Intrinsic::rint:

  case Intrinsic::nearbyint:

  case Intrinsic::round:

  case Intrinsic::roundeven:

  case Intrinsic::pow:

  case Intrinsic::fma:

  case Intrinsic::fmuladd:

  case Intrinsic::is_fpclass:

  case Intrinsic::powi:

  case Intrinsic::canonicalize:

  case Intrinsic::fptosi_sat:

  case Intrinsic::fptoui_sat:

  case Intrinsic::lround:

  case Intrinsic::llround:

  case Intrinsic::lrint:

  case Intrinsic::llrint:

  case Intrinsic::ucmp:

  case Intrinsic::scmp:

  case Intrinsic::clmul:

    return true;

  default:

    return false;

  }

}


bool llvm::isTriviallyScalarizable(Intrinsic::ID ID) {

  if (isTriviallyVectorizable(ID))

    return true;


  return Intrinsic::isTriviallyScalarizable(ID);

}


/// Identifies if the vector form of the intrinsic has a scalar operand.


bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,

                                              unsigned ScalarOpdIdx,

                                              const TargetTransformInfo *TTI) {


  if (TTI && Intrinsic::isTargetIntrinsic(ID))

    return TTI->isTargetIntrinsicWithScalarOpAtArg(ID, ScalarOpdIdx);


  // Vector predication intrinsics have the EVL as the last operand.

  if (VPIntrinsic::getVectorLengthParamPos(ID) == ScalarOpdIdx)

    return true;


  switch (ID) {

  case Intrinsic::abs:

  case Intrinsic::vp_abs:

  case Intrinsic::ctlz:

  case Intrinsic::vp_ctlz:

  case Intrinsic::cttz:

  case Intrinsic::vp_cttz:

  case Intrinsic::is_fpclass:

  case Intrinsic::vp_is_fpclass:

  case Intrinsic::powi:

  case Intrinsic::vector_extract:

    return (ScalarOpdIdx == 1);

  case Intrinsic::smul_fix:

  case Intrinsic::smul_fix_sat:

  case Intrinsic::umul_fix:

  case Intrinsic::umul_fix_sat:

  case Intrinsic::vector_splice_left:

  case Intrinsic::vector_splice_right:

    return (ScalarOpdIdx == 2);

  case Intrinsic::experimental_vp_splice:

    return ScalarOpdIdx == 2 || ScalarOpdIdx == 4;

  case Intrinsic::experimental_vp_strided_load:

    return ScalarOpdIdx == 0 || ScalarOpdIdx == 1;

  case Intrinsic::loop_dependence_war_mask:

    return true;

  default:

    return false;

  }

}


bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(

    Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI) {

  assert(ID != Intrinsic::not_intrinsic && "Not an intrinsic!");


  if (TTI && Intrinsic::isTargetIntrinsic(ID))

    return TTI->isTargetIntrinsicWithOverloadTypeAtArg(ID, OpdIdx);


  if (VPCastIntrinsic::isVPCast(ID))

    return OpdIdx == -1 || OpdIdx == 0;


  switch (ID) {

  case Intrinsic::fptosi_sat:

  case Intrinsic::fptoui_sat:

  case Intrinsic::lround:

  case Intrinsic::llround:

  case Intrinsic::lrint:

  case Intrinsic::llrint:

  case Intrinsic::vp_lrint:

  case Intrinsic::vp_llrint:

  case Intrinsic::ucmp:

  case Intrinsic::scmp:

  case Intrinsic::vector_extract:

  case Intrinsic::loop_dependence_war_mask:

    return OpdIdx == -1 || OpdIdx == 0;

  case Intrinsic::modf:

  case Intrinsic::sincos:

  case Intrinsic::sincospi:

  case Intrinsic::is_fpclass:

  case Intrinsic::vp_is_fpclass:

    return OpdIdx == 0;

  case Intrinsic::powi:

  case Intrinsic::ldexp:

    return OpdIdx == -1 || OpdIdx == 1;

  case Intrinsic::experimental_vp_strided_load:

    return OpdIdx == -1 || OpdIdx == 0 || OpdIdx == 1;

  default:

    return OpdIdx == -1;

  }

}


bool llvm::isVectorIntrinsicWithStructReturnOverloadAtField(

    Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI) {


  if (TTI && Intrinsic::isTargetIntrinsic(ID))

    return TTI->isTargetIntrinsicWithStructReturnOverloadAtField(ID, RetIdx);


  switch (ID) {

  case Intrinsic::frexp:

    return RetIdx == 0 || RetIdx == 1;

  default:

    return RetIdx == 0;

  }

}


/// Returns intrinsic ID for call.

/// For the input call instruction it finds mapping intrinsic and returns

/// its ID, in case it does not found it return not_intrinsic.


Intrinsic::ID llvm::getVectorIntrinsicIDForCall(const CallInst *CI,

                                                const TargetLibraryInfo *TLI) {

  Intrinsic::ID ID = getIntrinsicForCallSite(*CI, TLI);

  if (ID == Intrinsic::not_intrinsic)

    return Intrinsic::not_intrinsic;


  if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||

      ID == Intrinsic::lifetime_end || ID == Intrinsic::assume ||

      ID == Intrinsic::experimental_noalias_scope_decl ||

      ID == Intrinsic::sideeffect || ID == Intrinsic::pseudoprobe)

    return ID;

  return Intrinsic::not_intrinsic;

}


unsigned llvm::getInterleaveIntrinsicFactor(Intrinsic::ID ID) {

  switch (ID) {

  case Intrinsic::vector_interleave2:

    return 2;

  case Intrinsic::vector_interleave3:

    return 3;

  case Intrinsic::vector_interleave4:

    return 4;

  case Intrinsic::vector_interleave5:

    return 5;

  case Intrinsic::vector_interleave6:

    return 6;

  case Intrinsic::vector_interleave7:

    return 7;

  case Intrinsic::vector_interleave8:

    return 8;

  default:

    return 0;

  }

}


unsigned llvm::getDeinterleaveIntrinsicFactor(Intrinsic::ID ID) {

  switch (ID) {

  case Intrinsic::vector_deinterleave2:

    return 2;

  case Intrinsic::vector_deinterleave3:

    return 3;

  case Intrinsic::vector_deinterleave4:

    return 4;

  case Intrinsic::vector_deinterleave5:

    return 5;

  case Intrinsic::vector_deinterleave6:

    return 6;

  case Intrinsic::vector_deinterleave7:

    return 7;

  case Intrinsic::vector_deinterleave8:

    return 8;

  default:

    return 0;

  }

}


VectorType *llvm::getDeinterleavedVectorType(IntrinsicInst *DI) {

  [[maybe_unused]] unsigned Factor =

      getDeinterleaveIntrinsicFactor(DI->getIntrinsicID());

  ArrayRef<Type *> DISubtypes = DI->getType()->subtypes();

  assert(Factor && Factor == DISubtypes.size() &&

         "unexpected deinterleave factor or result type");

  return cast<VectorType>(DISubtypes[0]);

}


/// Given a vector and an element number, see if the scalar value is

/// already around as a register, for example if it were inserted then extracted

/// from the vector.


Value *llvm::findScalarElement(Value *V, unsigned EltNo) {

  assert(V->getType()->isVectorTy() && "Not looking at a vector?");

  VectorType *VTy = cast<VectorType>(V->getType());

  // For fixed-length vector, return poison for out of range access.

  if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {

    unsigned Width = FVTy->getNumElements();

    if (EltNo >= Width)

      return PoisonValue::get(FVTy->getElementType());

  }


  if (Constant *C = dyn_cast<Constant>(V))

    return C->getAggregateElement(EltNo);


  if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {

    // If this is an insert to a variable element, we don't know what it is.

    uint64_t IIElt;

    if (!match(III->getOperand(2), m_ConstantInt(IIElt)))

      return nullptr;


    // If this is an insert to the element we are looking for, return the

    // inserted value.

    if (EltNo == IIElt)

      return III->getOperand(1);


    // Guard against infinite loop on malformed, unreachable IR.

    if (III == III->getOperand(0))

      return nullptr;


    // Otherwise, the insertelement doesn't modify the value, recurse on its

    // vector input.

    return findScalarElement(III->getOperand(0), EltNo);

  }


  ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V);

  // Restrict the following transformation to fixed-length vector.

  if (SVI && isa<FixedVectorType>(SVI->getType())) {

    unsigned LHSWidth =

        cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();

    int InEl = SVI->getMaskValue(EltNo);

    if (InEl < 0)

      return PoisonValue::get(VTy->getElementType());

    if (InEl < (int)LHSWidth)

      return findScalarElement(SVI->getOperand(0), InEl);

    return findScalarElement(SVI->getOperand(1), InEl - LHSWidth);

  }


  // Extract a value from a vector add operation with a constant zero.

  // TODO: Use getBinOpIdentity() to generalize this.

  Value *Val; Constant *C;

  if (match(V, m_Add(m_Value(Val), m_Constant(C))))

    if (Constant *Elt = C->getAggregateElement(EltNo))

      if (Elt->isNullValue())

        return findScalarElement(Val, EltNo);


  // If the vector is a splat then we can trivially find the scalar element.

  if (isa<ScalableVectorType>(VTy))

    if (Value *Splat = getSplatValue(V))

      if (EltNo < VTy->getElementCount().getKnownMinValue())

        return Splat;


  // Otherwise, we don't know.

  return nullptr;

}


int llvm::getSplatIndex(ArrayRef<int> Mask) {

  int SplatIndex = -1;

  for (int M : Mask) {

    // Ignore invalid (undefined) mask elements.

    if (M < 0)

      continue;


    // There can be only 1 non-negative mask element value if this is a splat.

    if (SplatIndex != -1 && SplatIndex != M)

      return -1;


    // Initialize the splat index to the 1st non-negative mask element.

    SplatIndex = M;

  }

  assert((SplatIndex == -1 || SplatIndex >= 0) && "Negative index?");

  return SplatIndex;

}


/// Get splat value if the input is a splat vector or return nullptr.

/// This function is not fully general. It checks only 2 cases:

/// the input value is (1) a splat constant vector or (2) a sequence

/// of instructions that broadcasts a scalar at element 0.


Value *llvm::getSplatValue(const Value *V) {

  if (isa<VectorType>(V->getType()))

    if (auto *C = dyn_cast<Constant>(V))

      return C->getSplatValue();


  // shuf (inselt ?, Splat, 0), ?, <0, undef, 0, ...>

  Value *Splat;

  if (match(V,

            m_Shuffle(m_InsertElt(m_Value(), m_Value(Splat), m_ZeroInt()),

                      m_Value(), m_ZeroMask())))

    return Splat;


  return nullptr;

}


bool llvm::isSplatValue(const Value *V, int Index, unsigned Depth) {

  assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");


  if (isa<VectorType>(V->getType())) {

    if (isa<UndefValue>(V))

      return true;

    // FIXME: We can allow undefs, but if Index was specified, we may want to

    //        check that the constant is defined at that index.

    if (auto *C = dyn_cast<Constant>(V))

      return C->getSplatValue() != nullptr;

  }


  if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) {

    // FIXME: We can safely allow undefs here. If Index was specified, we will

    //        check that the mask elt is defined at the required index.

    if (!all_equal(Shuf->getShuffleMask()))

      return false;


    // Match any index.

    if (Index == -1)

      return true;


    // Match a specific element. The mask should be defined at and match the

    // specified index.

    return Shuf->getMaskValue(Index) == Index;

  }


  // The remaining tests are all recursive, so bail out if we hit the limit.

  if (Depth++ == MaxAnalysisRecursionDepth)

    return false;


  // If both operands of a binop are splats, the result is a splat.

  Value *X, *Y, *Z;

  if (match(V, m_BinOp(m_Value(X), m_Value(Y))))

    return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth);


  // If all operands of a select are splats, the result is a splat.

  if (match(V, m_Select(m_Value(X), m_Value(Y), m_Value(Z))))

    return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth) &&

           isSplatValue(Z, Index, Depth);


  // TODO: Add support for unary ops (fneg), casts, intrinsics (overflow ops).


  return false;

}


bool llvm::getShuffleDemandedElts(int SrcWidth, ArrayRef<int> Mask,

                                  const APInt &DemandedElts, APInt &DemandedLHS,

                                  APInt &DemandedRHS, bool AllowUndefElts) {

  DemandedLHS = DemandedRHS = APInt::getZero(SrcWidth);


  // Early out if we don't demand any elements.

  if (DemandedElts.isZero())

    return true;


  // Simple case of a shuffle with zeroinitializer.

  if (all_of(Mask, equal_to(0))) {

    DemandedLHS.setBit(0);

    return true;

  }


  for (unsigned I = 0, E = Mask.size(); I != E; ++I) {

    int M = Mask[I];

    assert((-1 <= M) && (M < (SrcWidth * 2)) &&

           "Invalid shuffle mask constant");


    if (!DemandedElts[I] || (AllowUndefElts && (M < 0)))

      continue;


    // For undef elements, we don't know anything about the common state of

    // the shuffle result.

    if (M < 0)

      return false;


    if (M < SrcWidth)

      DemandedLHS.setBit(M);

    else

      DemandedRHS.setBit(M - SrcWidth);

  }


  return true;

}


bool llvm::isMaskedSlidePair(ArrayRef<int> Mask, int NumElts,

                             std::array<std::pair<int, int>, 2> &SrcInfo) {

  const int SignalValue = NumElts * 2;

  SrcInfo[0] = {-1, SignalValue};

  SrcInfo[1] = {-1, SignalValue};

  for (auto [i, M] : enumerate(Mask)) {

    if (M < 0)

      continue;

    int Src = M >= NumElts;

    int Diff = (int)i - (M % NumElts);

    bool Match = false;

    for (int j = 0; j < 2; j++) {

      auto &[SrcE, DiffE] = SrcInfo[j];

      if (SrcE == -1) {

        assert(DiffE == SignalValue);

        SrcE = Src;

        DiffE = Diff;

      }

      if (SrcE == Src && DiffE == Diff) {

        Match = true;

        break;

      }

    }

    if (!Match)

      return false;

  }

  // Avoid all undef masks

  return SrcInfo[0].first != -1;

}


void llvm::narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask,

                                 SmallVectorImpl<int> &ScaledMask) {

  assert(Scale > 0 && "Unexpected scaling factor");


  // Fast-path: if no scaling, then it is just a copy.

  if (Scale == 1) {

    ScaledMask.assign(Mask.begin(), Mask.end());

    return;

  }


  ScaledMask.clear();

  for (int MaskElt : Mask) {

    if (MaskElt >= 0) {

      assert(((uint64_t)Scale * MaskElt + (Scale - 1)) <= INT32_MAX &&

             "Overflowed 32-bits");

    }

    for (int SliceElt = 0; SliceElt != Scale; ++SliceElt)

      ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt);

  }

}


bool llvm::widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,

                                SmallVectorImpl<int> &ScaledMask) {

  assert(Scale > 0 && "Unexpected scaling factor");


  // Fast-path: if no scaling, then it is just a copy.

  if (Scale == 1) {

    ScaledMask.assign(Mask.begin(), Mask.end());

    return true;

  }


  // We must map the original elements down evenly to a type with less elements.

  int NumElts = Mask.size();

  if (NumElts % Scale != 0)

    return false;


  ScaledMask.clear();

  ScaledMask.reserve(NumElts / Scale);


  // Step through the input mask by splitting into Scale-sized slices.

  do {

    ArrayRef<int> MaskSlice = Mask.take_front(Scale);

    assert((int)MaskSlice.size() == Scale && "Expected Scale-sized slice.");


    // The first element of the slice determines how we evaluate this slice.

    int SliceFront = MaskSlice.front();

    if (SliceFront < 0) {

      // Negative values (undef or other "sentinel" values) must be equal across

      // the entire slice.

      if (!all_equal(MaskSlice))

        return false;

      ScaledMask.push_back(SliceFront);

    } else {

      // A positive mask element must be cleanly divisible.

      if (SliceFront % Scale != 0)

        return false;

      // Elements of the slice must be consecutive.

      for (int i = 1; i < Scale; ++i)

        if (MaskSlice[i] != SliceFront + i)

          return false;

      ScaledMask.push_back(SliceFront / Scale);

    }

    Mask = Mask.drop_front(Scale);

  } while (!Mask.empty());


  assert((int)ScaledMask.size() * Scale == NumElts && "Unexpected scaled mask");


  // All elements of the original mask can be scaled down to map to the elements

  // of a mask with wider elements.

  return true;

}


bool llvm::widenShuffleMaskElts(ArrayRef<int> M,

                                SmallVectorImpl<int> &NewMask) {

  unsigned NumElts = M.size();

  if (NumElts % 2 != 0)

    return false;


  NewMask.clear();

  for (unsigned i = 0; i < NumElts; i += 2) {

    int M0 = M[i];

    int M1 = M[i + 1];


    // If both elements are undef, new mask is undef too.

    if (M0 == -1 && M1 == -1) {

      NewMask.push_back(-1);

      continue;

    }


    if (M0 == -1 && M1 != -1 && (M1 % 2) == 1) {

      NewMask.push_back(M1 / 2);

      continue;

    }


    if (M0 != -1 && (M0 % 2) == 0 && ((M0 + 1) == M1 || M1 == -1)) {

      NewMask.push_back(M0 / 2);

      continue;

    }


    NewMask.clear();

    return false;

  }


  assert(NewMask.size() == NumElts / 2 && "Incorrect size for mask!");

  return true;

}


bool llvm::scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef<int> Mask,

                                SmallVectorImpl<int> &ScaledMask) {

  unsigned NumSrcElts = Mask.size();

  assert(NumSrcElts > 0 && NumDstElts > 0 && "Unexpected scaling factor");


  // Fast-path: if no scaling, then it is just a copy.

  if (NumSrcElts == NumDstElts) {

    ScaledMask.assign(Mask.begin(), Mask.end());

    return true;

  }


  // Ensure we can find a whole scale factor.

  assert(((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) &&

         "Unexpected scaling factor");


  if (NumSrcElts > NumDstElts) {

    int Scale = NumSrcElts / NumDstElts;

    return widenShuffleMaskElts(Scale, Mask, ScaledMask);

  }


  int Scale = NumDstElts / NumSrcElts;

  narrowShuffleMaskElts(Scale, Mask, ScaledMask);

  return true;

}


void llvm::getShuffleMaskWithWidestElts(ArrayRef<int> Mask,

                                        SmallVectorImpl<int> &ScaledMask) {

  std::array<SmallVector<int, 16>, 2> TmpMasks;

  SmallVectorImpl<int> *Output = &TmpMasks[0], *Tmp = &TmpMasks[1];

  ArrayRef<int> InputMask = Mask;

  for (unsigned Scale = 2; Scale <= InputMask.size(); ++Scale) {

    while (widenShuffleMaskElts(Scale, InputMask, *Output)) {

      InputMask = *Output;

      std::swap(Output, Tmp);

    }

  }

  ScaledMask.assign(InputMask.begin(), InputMask.end());

}


void llvm::processShuffleMasks(

    ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,

    unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,

    function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,

    function_ref<void(ArrayRef<int>, unsigned, unsigned, bool)>

        ManyInputsAction) {

  SmallVector<SmallVector<SmallVector<int>>> Res(NumOfDestRegs);

  // Try to perform better estimation of the permutation.

  // 1. Split the source/destination vectors into real registers.

  // 2. Do the mask analysis to identify which real registers are

  // permuted.

  int Sz = Mask.size();

  unsigned SzDest = Sz / NumOfDestRegs;

  unsigned SzSrc = Sz / NumOfSrcRegs;

  for (unsigned I = 0; I < NumOfDestRegs; ++I) {

    auto &RegMasks = Res[I];

    RegMasks.assign(2 * NumOfSrcRegs, {});

    // Check that the values in dest registers are in the one src

    // register.

    for (unsigned K = 0; K < SzDest; ++K) {

      int Idx = I * SzDest + K;

      if (Idx == Sz)

        break;

      if (Mask[Idx] >= 2 * Sz || Mask[Idx] == PoisonMaskElem)

        continue;

      int MaskIdx = Mask[Idx] % Sz;

      int SrcRegIdx = MaskIdx / SzSrc + (Mask[Idx] >= Sz ? NumOfSrcRegs : 0);

      // Add a cost of PermuteTwoSrc for each new source register permute,

      // if we have more than one source registers.

      if (RegMasks[SrcRegIdx].empty())

        RegMasks[SrcRegIdx].assign(SzDest, PoisonMaskElem);

      RegMasks[SrcRegIdx][K] = MaskIdx % SzSrc;

    }

  }

  // Process split mask.

  for (unsigned I : seq<unsigned>(NumOfUsedRegs)) {

    auto &Dest = Res[I];

    int NumSrcRegs =

        count_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });

    switch (NumSrcRegs) {

    case 0:

      // No input vectors were used!

      NoInputAction();

      break;

    case 1: {

      // Find the only mask with at least single undef mask elem.

      auto *It =

          find_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });

      unsigned SrcReg = std::distance(Dest.begin(), It);

      SingleInputAction(*It, SrcReg, I);

      break;

    }

    default: {

      // The first mask is a permutation of a single register. Since we have >2

      // input registers to shuffle, we merge the masks for 2 first registers

      // and generate a shuffle of 2 registers rather than the reordering of the

      // first register and then shuffle with the second register. Next,

      // generate the shuffles of the resulting register + the remaining

      // registers from the list.

      auto &&CombineMasks = [](MutableArrayRef<int> FirstMask,

                               ArrayRef<int> SecondMask) {

        for (int Idx = 0, VF = FirstMask.size(); Idx < VF; ++Idx) {

          if (SecondMask[Idx] != PoisonMaskElem) {

            assert(FirstMask[Idx] == PoisonMaskElem &&

                   "Expected undefined mask element.");

            FirstMask[Idx] = SecondMask[Idx] + VF;

          }

        }

      };

      auto &&NormalizeMask = [](MutableArrayRef<int> Mask) {

        for (int Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {

          if (Mask[Idx] != PoisonMaskElem)

            Mask[Idx] = Idx;

        }

      };

      int SecondIdx;

      bool NewReg = true;

      do {

        int FirstIdx = -1;

        SecondIdx = -1;

        MutableArrayRef<int> FirstMask, SecondMask;

        for (unsigned I : seq<unsigned>(2 * NumOfSrcRegs)) {

          SmallVectorImpl<int> &RegMask = Dest[I];

          if (RegMask.empty())

            continue;


          if (FirstIdx == SecondIdx) {

            FirstIdx = I;

            FirstMask = RegMask;

            continue;

          }

          SecondIdx = I;

          SecondMask = RegMask;

          CombineMasks(FirstMask, SecondMask);

          ManyInputsAction(FirstMask, FirstIdx, SecondIdx, NewReg);

          NewReg = false;

          NormalizeMask(FirstMask);

          RegMask.clear();

          SecondMask = FirstMask;

          SecondIdx = FirstIdx;

        }

        if (FirstIdx != SecondIdx && SecondIdx >= 0) {

          CombineMasks(SecondMask, FirstMask);

          ManyInputsAction(SecondMask, SecondIdx, FirstIdx, NewReg);

          NewReg = false;

          Dest[FirstIdx].clear();

          NormalizeMask(SecondMask);

        }

      } while (SecondIdx >= 0);

      break;

    }

    }

  }

}


void llvm::getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth,

                                               const APInt &DemandedElts,

                                               APInt &DemandedLHS,

                                               APInt &DemandedRHS) {

  assert(VectorBitWidth >= 128 && "Vectors smaller than 128 bit not supported");

  int NumLanes = VectorBitWidth / 128;

  int NumElts = DemandedElts.getBitWidth();

  int NumEltsPerLane = NumElts / NumLanes;

  int HalfEltsPerLane = NumEltsPerLane / 2;


  DemandedLHS = APInt::getZero(NumElts);

  DemandedRHS = APInt::getZero(NumElts);


  // Map DemandedElts to the horizontal operands.

  for (int Idx = 0; Idx != NumElts; ++Idx) {

    if (!DemandedElts[Idx])

      continue;

    int LaneIdx = (Idx / NumEltsPerLane) * NumEltsPerLane;

    int LocalIdx = Idx % NumEltsPerLane;

    if (LocalIdx < HalfEltsPerLane) {

      DemandedLHS.setBit(LaneIdx + 2 * LocalIdx);

    } else {

      LocalIdx -= HalfEltsPerLane;

      DemandedRHS.setBit(LaneIdx + 2 * LocalIdx);

    }

  }

}


MapVector<Instruction *, uint64_t>


llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,

                               const TargetTransformInfo *TTI) {


  // DemandedBits will give us every value's live-out bits. But we want

  // to ensure no extra casts would need to be inserted, so every DAG

  // of connected values must have the same minimum bitwidth.

  EquivalenceClasses<Value *> ECs;

  SmallVector<Instruction *, 16> Worklist;

  SmallPtrSet<Instruction *, 4> Roots;

  SmallPtrSet<Instruction *, 16> Visited;

  DenseMap<Value *, uint64_t> DBits;

  SmallPtrSet<Instruction *, 4> InstructionSet;

  MapVector<Instruction *, uint64_t> MinBWs;


  // Determine the roots. We work bottom-up, from truncs or icmps.

  bool SeenExtFromIllegalType = false;

  for (auto *BB : Blocks)

    for (auto &I : *BB) {

      InstructionSet.insert(&I);


      if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) &&

          !TTI->isTypeLegal(I.getOperand(0)->getType()))

        SeenExtFromIllegalType = true;


      // Only deal with non-vector integers up to 64-bits wide.

      if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) &&

          !I.getType()->isVectorTy() &&

          I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {

        // Don't make work for ourselves. If we know the loaded type is legal,

        // don't add it to the worklist.

        if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType()))

          continue;


        Worklist.push_back(&I);

        Roots.insert(&I);

      }

    }

  // Early exit.

  if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))

    return MinBWs;


  // Now proceed breadth-first, unioning values together.

  while (!Worklist.empty()) {

    Instruction *I = Worklist.pop_back_val();

    Value *Leader = ECs.getOrInsertLeaderValue(I);


    if (!Visited.insert(I).second)

      continue;


    // If we encounter a type that is larger than 64 bits, we can't represent

    // it so bail out.

    if (DB.getDemandedBits(I).getBitWidth() > 64)

      return MapVector<Instruction *, uint64_t>();


    uint64_t V = DB.getDemandedBits(I).getZExtValue();

    DBits[Leader] |= V;

    DBits[I] = V;


    // Casts, loads and instructions outside of our range terminate a chain

    // successfully.

    if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) ||

        !InstructionSet.count(I))

      continue;


    // Unsafe casts terminate a chain unsuccessfully. We can't do anything

    // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to

    // transform anything that relies on them.

    if (isa<BitCastInst>(I) || isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I) ||

        !I->getType()->isIntegerTy()) {

      DBits[Leader] |= ~0ULL;

      continue;

    }


    // We don't modify the types of PHIs. Reductions will already have been

    // truncated if possible, and inductions' sizes will have been chosen by

    // indvars.

    if (isa<PHINode>(I))

      continue;


    // Don't modify the types of operands of a call, as doing that would cause a

    // signature mismatch.

    if (isa<CallBase>(I))

      continue;


    if (DBits[Leader] == ~0ULL)

      // All bits demanded, no point continuing.

      continue;


    for (Value *O : I->operands()) {

      ECs.unionSets(Leader, O);

      if (auto *OI = dyn_cast<Instruction>(O))

        Worklist.push_back(OI);

    }

  }


  // Now we've discovered all values, walk them to see if there are

  // any users we didn't see. If there are, we can't optimize that

  // chain.

  for (auto &I : DBits)

    for (auto *U : I.first->users())

      if (U->getType()->isIntegerTy() && DBits.count(U) == 0)

        DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL;


  for (const auto &E : ECs) {

    if (!E->isLeader())

      continue;

    uint64_t LeaderDemandedBits = 0;

    for (Value *M : ECs.members(*E))

      LeaderDemandedBits |= DBits[M];


    uint64_t MinBW = llvm::bit_width(LeaderDemandedBits);

    // Round up to a power of 2

    MinBW = llvm::bit_ceil(MinBW);


    // We don't modify the types of PHIs. Reductions will already have been

    // truncated if possible, and inductions' sizes will have been chosen by

    // indvars.

    // If we are required to shrink a PHI, abandon this entire equivalence class.

    bool Abort = false;

    for (Value *M : ECs.members(*E))

      if (isa<PHINode>(M) && MinBW < M->getType()->getScalarSizeInBits()) {

        Abort = true;

        break;

      }

    if (Abort)

      continue;


    for (Value *M : ECs.members(*E)) {

      auto *MI = dyn_cast<Instruction>(M);

      if (!MI)

        continue;

      Type *Ty = M->getType();

      if (Roots.count(MI))

        Ty = MI->getOperand(0)->getType();


      if (MinBW >= Ty->getScalarSizeInBits())

        continue;


      // If any of M's operands demand more bits than MinBW then M cannot be

      // performed safely in MinBW.

      auto *Call = dyn_cast<CallBase>(MI);

      auto Ops = Call ? Call->args() : MI->operands();

      if (any_of(Ops, [&DB, MinBW](Use &U) {

            auto *CI = dyn_cast<ConstantInt>(U);

            // For constants shift amounts, check if the shift would result in

            // poison.

            if (CI &&

                isa<ShlOperator, LShrOperator, AShrOperator>(U.getUser()) &&

                U.getOperandNo() == 1)

              return CI->uge(MinBW);

            uint64_t BW = bit_width(DB.getDemandedBits(&U).getZExtValue());

            return bit_ceil(BW) > MinBW;

          }))

        continue;


      MinBWs[MI] = MinBW;

    }

  }


  return MinBWs;

}


/// Add all access groups in @p AccGroups to @p List.

template <typename ListT>


static void addToAccessGroupList(ListT &List, MDNode *AccGroups) {

  // Interpret an access group as a list containing itself.

  if (AccGroups->getNumOperands() == 0) {

    assert(isValidAsAccessGroup(AccGroups) && "Node must be an access group");

    List.insert(AccGroups);

    return;

  }


  for (const auto &AccGroupListOp : AccGroups->operands()) {

    auto *Item = cast<MDNode>(AccGroupListOp.get());

    assert(isValidAsAccessGroup(Item) && "List item must be an access group");

    List.insert(Item);

  }

}


MDNode *llvm::uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2) {

  if (!AccGroups1)

    return AccGroups2;

  if (!AccGroups2)

    return AccGroups1;

  if (AccGroups1 == AccGroups2)

    return AccGroups1;


  SmallSetVector<Metadata *, 4> Union;

  addToAccessGroupList(Union, AccGroups1);

  addToAccessGroupList(Union, AccGroups2);


  if (Union.size() == 0)

    return nullptr;

  if (Union.size() == 1)

    return cast<MDNode>(Union.front());


  LLVMContext &Ctx = AccGroups1->getContext();

  return MDNode::get(Ctx, Union.getArrayRef());

}


MDNode *llvm::intersectAccessGroups(const Instruction *Inst1,

                                    const Instruction *Inst2) {

  bool MayAccessMem1 = Inst1->mayReadOrWriteMemory();

  bool MayAccessMem2 = Inst2->mayReadOrWriteMemory();


  if (!MayAccessMem1 && !MayAccessMem2)

    return nullptr;

  if (!MayAccessMem1)

    return Inst2->getMetadata(LLVMContext::MD_access_group);

  if (!MayAccessMem2)

    return Inst1->getMetadata(LLVMContext::MD_access_group);


  MDNode *MD1 = Inst1->getMetadata(LLVMContext::MD_access_group);

  MDNode *MD2 = Inst2->getMetadata(LLVMContext::MD_access_group);

  if (!MD1 || !MD2)

    return nullptr;

  if (MD1 == MD2)

    return MD1;


  // Use set for scalable 'contains' check.

  SmallPtrSet<Metadata *, 4> AccGroupSet2;

  addToAccessGroupList(AccGroupSet2, MD2);


  SmallVector<Metadata *, 4> Intersection;

  if (MD1->getNumOperands() == 0) {

    assert(isValidAsAccessGroup(MD1) && "Node must be an access group");

    if (AccGroupSet2.count(MD1))

      Intersection.push_back(MD1);

  } else {

    for (const MDOperand &Node : MD1->operands()) {

      auto *Item = cast<MDNode>(Node.get());

      assert(isValidAsAccessGroup(Item) && "List item must be an access group");

      if (AccGroupSet2.count(Item))

        Intersection.push_back(Item);

    }

  }


  if (Intersection.size() == 0)

    return nullptr;

  if (Intersection.size() == 1)

    return cast<MDNode>(Intersection.front());


  LLVMContext &Ctx = Inst1->getContext();

  return MDNode::get(Ctx, Intersection);

}


/// Add metadata from \p Inst to \p Metadata, if it can be preserved after

/// vectorization.


void llvm::getMetadataToPropagate(

    Instruction *Inst,

    SmallVectorImpl<std::pair<unsigned, MDNode *>> &Metadata) {

  Inst->getAllMetadataOtherThanDebugLoc(Metadata);

  static const unsigned SupportedIDs[] = {

      LLVMContext::MD_tbaa,         LLVMContext::MD_alias_scope,

      LLVMContext::MD_noalias,      LLVMContext::MD_fpmath,

      LLVMContext::MD_nontemporal,  LLVMContext::MD_invariant_load,

      LLVMContext::MD_access_group, LLVMContext::MD_mmra};


  // Remove any unsupported metadata kinds from Metadata.

  for (unsigned Idx = 0; Idx != Metadata.size();) {

    if (is_contained(SupportedIDs, Metadata[Idx].first)) {

      ++Idx;

    } else {

      // Swap element to end and remove it.

      std::swap(Metadata[Idx], Metadata.back());

      Metadata.pop_back();

    }

  }

}


/// \returns \p I after propagating metadata from \p VL.


Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) {

  if (VL.empty())

    return Inst;

  SmallVector<std::pair<unsigned, MDNode *>> Metadata;

  getMetadataToPropagate(cast<Instruction>(VL[0]), Metadata);


  for (auto &[Kind, MD] : Metadata) {

    // Skip MMRA metadata if the instruction cannot have it.

    if (Kind == LLVMContext::MD_mmra && !canInstructionHaveMMRAs(*Inst))

      continue;


    for (int J = 1, E = VL.size(); MD && J != E; ++J) {

      const Instruction *IJ = cast<Instruction>(VL[J]);

      MDNode *IMD = IJ->getMetadata(Kind);


      switch (Kind) {

      case LLVMContext::MD_mmra: {

        MD = MMRAMetadata::combine(Inst->getContext(), MD, IMD);

        break;

      }

      case LLVMContext::MD_tbaa:

        MD = MDNode::getMostGenericTBAA(MD, IMD);

        break;

      case LLVMContext::MD_alias_scope:

        MD = MDNode::getMostGenericAliasScope(MD, IMD);

        break;

      case LLVMContext::MD_fpmath:

        MD = MDNode::getMostGenericFPMath(MD, IMD);

        break;

      case LLVMContext::MD_noalias:

      case LLVMContext::MD_nontemporal:

      case LLVMContext::MD_invariant_load:

        MD = MDNode::intersect(MD, IMD);

        break;

      case LLVMContext::MD_access_group:

        MD = intersectAccessGroups(Inst, IJ);

        break;

      default:

        llvm_unreachable("unhandled metadata");

      }

    }


    Inst->setMetadata(Kind, MD);

  }


  return Inst;

}


Constant *


llvm::createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF,

                           const InterleaveGroup<Instruction> &Group) {

  // All 1's means mask is not needed.

  if (Group.isFull())

    return nullptr;


  // TODO: support reversed access.

  assert(!Group.isReverse() && "Reversed group not supported.");


  SmallVector<Constant *, 16> Mask;

  for (unsigned i = 0; i < VF; i++)

    for (unsigned j = 0; j < Group.getFactor(); ++j) {

      unsigned HasMember = Group.getMember(j) ? 1 : 0;

      Mask.push_back(Builder.getInt1(HasMember));

    }


  return ConstantVector::get(Mask);

}


llvm::SmallVector<int, 16>


llvm::createReplicatedMask(unsigned ReplicationFactor, unsigned VF) {

  SmallVector<int, 16> MaskVec;

  for (unsigned i = 0; i < VF; i++)

    for (unsigned j = 0; j < ReplicationFactor; j++)

      MaskVec.push_back(i);


  return MaskVec;

}


llvm::SmallVector<int, 16> llvm::createInterleaveMask(unsigned VF,

                                                      unsigned NumVecs) {

  SmallVector<int, 16> Mask;

  for (unsigned i = 0; i < VF; i++)

    for (unsigned j = 0; j < NumVecs; j++)

      Mask.push_back(j * VF + i);


  return Mask;

}


llvm::SmallVector<int, 16>


llvm::createStrideMask(unsigned Start, unsigned Stride, unsigned VF) {

  SmallVector<int, 16> Mask;

  for (unsigned i = 0; i < VF; i++)

    Mask.push_back(Start + i * Stride);


  return Mask;

}


llvm::SmallVector<int, 16> llvm::createSequentialMask(unsigned Start,

                                                      unsigned NumInts,

                                                      unsigned NumUndefs) {

  SmallVector<int, 16> Mask;

  for (unsigned i = 0; i < NumInts; i++)

    Mask.push_back(Start + i);


  for (unsigned i = 0; i < NumUndefs; i++)

    Mask.push_back(-1);


  return Mask;

}


llvm::SmallVector<int, 16> llvm::createUnaryMask(ArrayRef<int> Mask,

                                                 unsigned NumElts) {

  // Avoid casts in the loop and make sure we have a reasonable number.

  int NumEltsSigned = NumElts;

  assert(NumEltsSigned > 0 && "Expected smaller or non-zero element count");


  // If the mask chooses an element from operand 1, reduce it to choose from the

  // corresponding element of operand 0. Undef mask elements are unchanged.

  SmallVector<int, 16> UnaryMask;

  for (int MaskElt : Mask) {

    assert((MaskElt < NumEltsSigned * 2) && "Expected valid shuffle mask");

    int UnaryElt = MaskElt >= NumEltsSigned ? MaskElt - NumEltsSigned : MaskElt;

    UnaryMask.push_back(UnaryElt);

  }

  return UnaryMask;

}


/// A helper function for concatenating vectors. This function concatenates two

/// vectors having the same element type. If the second vector has fewer

/// elements than the first, it is padded with undefs.


static Value *concatenateTwoVectors(IRBuilderBase &Builder, Value *V1,

                                    Value *V2) {

  VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());

  VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());

  assert(VecTy1 && VecTy2 &&

         VecTy1->getScalarType() == VecTy2->getScalarType() &&

         "Expect two vectors with the same element type");


  unsigned NumElts1 = cast<FixedVectorType>(VecTy1)->getNumElements();

  unsigned NumElts2 = cast<FixedVectorType>(VecTy2)->getNumElements();

  assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");


  if (NumElts1 > NumElts2) {

    // Extend with UNDEFs.

    V2 = Builder.CreateShuffleVector(

        V2, createSequentialMask(0, NumElts2, NumElts1 - NumElts2));

  }


  return Builder.CreateShuffleVector(

      V1, V2, createSequentialMask(0, NumElts1 + NumElts2, 0));

}


Value *llvm::concatenateVectors(IRBuilderBase &Builder,

                                ArrayRef<Value *> Vecs) {

  unsigned NumVecs = Vecs.size();

  assert(NumVecs > 1 && "Should be at least two vectors");


  SmallVector<Value *, 8> ResList;

  ResList.append(Vecs.begin(), Vecs.end());

  do {

    SmallVector<Value *, 8> TmpList;

    for (unsigned i = 0; i < NumVecs - 1; i += 2) {

      Value *V0 = ResList[i], *V1 = ResList[i + 1];

      assert((V0->getType() == V1->getType() || i == NumVecs - 2) &&

             "Only the last vector may have a different type");


      TmpList.push_back(concatenateTwoVectors(Builder, V0, V1));

    }


    // Push the last vector if the total number of vectors is odd.

    if (NumVecs % 2 != 0)

      TmpList.push_back(ResList[NumVecs - 1]);


    ResList = TmpList;

    NumVecs = ResList.size();

  } while (NumVecs > 1);


  return ResList[0];

}


bool llvm::maskContainsAllOneOrUndef(Value *Mask) {

  assert(isa<VectorType>(Mask->getType()) &&

         isa<IntegerType>(Mask->getType()->getScalarType()) &&

         cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==

             1 &&

         "Mask must be a vector of i1");


  auto AllOneOrUndef = m_CombineOr(m_AllOnes(), m_UndefValue());

  return match(Mask, m_CombineOr(AllOneOrUndef, m_ContainsMatchingVectorElement(

                                                    AllOneOrUndef)));

}


/// TODO: This is a lot like known bits, but for

/// vectors.  Is there something we can common this with?


APInt llvm::possiblyDemandedEltsInMask(Value *Mask) {

  assert(isa<FixedVectorType>(Mask->getType()) &&

         isa<IntegerType>(Mask->getType()->getScalarType()) &&

         cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==

             1 &&

         "Mask must be a fixed width vector of i1");


  const unsigned VWidth =

      cast<FixedVectorType>(Mask->getType())->getNumElements();

  APInt DemandedElts = APInt::getAllOnes(VWidth);

  if (auto *CV = dyn_cast<ConstantVector>(Mask))

    for (unsigned i = 0; i < VWidth; i++)

      if (CV->getAggregateElement(i)->isNullValue())

        DemandedElts.clearBit(i);

  return DemandedElts;

}


bool InterleavedAccessInfo::isStrided(int Stride) {

  unsigned Factor = std::abs(Stride);

  return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;

}


void InterleavedAccessInfo::collectConstStrideAccesses(

    MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,

    const DenseMap<Value *, const SCEV *> &Strides,

    SmallVectorImpl<const SCEVPredicate *> *Predicates) {

  auto &DL = TheLoop->getHeader()->getDataLayout();


  // Since it's desired that the load/store instructions be maintained in

  // "program order" for the interleaved access analysis, we have to visit the

  // blocks in the loop in reverse postorder (i.e., in a topological order).

  // Such an ordering will ensure that any load/store that may be executed

  // before a second load/store will precede the second load/store in

  // AccessStrideInfo.

  LoopBlocksDFS DFS(TheLoop);

  DFS.perform(LI);

  for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))

    for (auto &I : *BB) {

      Value *Ptr = getLoadStorePointerOperand(&I);

      if (!Ptr)

        continue;

      Type *ElementTy = getLoadStoreType(&I);


      // Currently, codegen doesn't support cases where the type size doesn't

      // match the alloc size. Skip them for now.

      uint64_t Size = DL.getTypeAllocSize(ElementTy);

      if (Size * 8 != DL.getTypeSizeInBits(ElementTy))

        continue;


      // We don't check wrapping here because we don't know yet if Ptr will be

      // part of a full group or a group with gaps. Checking wrapping for all

      // pointers (even those that end up in groups with no gaps) will be overly

      // conservative. For full groups, wrapping should be ok since if we would

      // wrap around the address space we would do a memory access at nullptr

      // even without the transformation. The wrapping checks are therefore

      // deferred until after we've formed the interleaved groups.

      int64_t Stride = getPtrStride(PSE, ElementTy, Ptr, TheLoop, *DT, Strides,

                                    /*ShouldCheckWrap=*/false, Predicates)

                           .value_or(0);


      const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);

      AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size,

                                              getLoadStoreAlignment(&I));

    }

}


// Analyze interleaved accesses and collect them into interleaved load and

// store groups.

//

// When generating code for an interleaved load group, we effectively hoist all

// loads in the group to the location of the first load in program order. When

// generating code for an interleaved store group, we sink all stores to the

// location of the last store. This code motion can change the order of load

// and store instructions and may break dependences.

//

// The code generation strategy mentioned above ensures that we won't violate

// any write-after-read (WAR) dependences.

//

// E.g., for the WAR dependence:  a = A[i];      // (1)

//                                A[i] = b;      // (2)

//

// The store group of (2) is always inserted at or below (2), and the load

// group of (1) is always inserted at or above (1). Thus, the instructions will

// never be reordered. All other dependences are checked to ensure the

// correctness of the instruction reordering.

//

// The algorithm visits all memory accesses in the loop in bottom-up program

// order. Program order is established by traversing the blocks in the loop in

// reverse postorder when collecting the accesses.

//

// We visit the memory accesses in bottom-up order because it can simplify the

// construction of store groups in the presence of write-after-write (WAW)

// dependences.

//

// E.g., for the WAW dependence:  A[i] = a;      // (1)

//                                A[i] = b;      // (2)

//                                A[i + 1] = c;  // (3)

//

// We will first create a store group with (3) and (2). (1) can't be added to

// this group because it and (2) are dependent. However, (1) can be grouped

// with other accesses that may precede it in program order. Note that a

// bottom-up order does not imply that WAW dependences should not be checked.


void InterleavedAccessInfo::analyzeInterleaving(

                                 bool EnablePredicatedInterleavedMemAccesses) {

  LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");

  const auto &Strides = LAI->getSymbolicStrides();


  // Holds all accesses with a constant stride.

  MapVector<Instruction *, StrideDescriptor> AccessStrideInfo;

  SmallVector<const SCEVPredicate *> Predicates;

  collectConstStrideAccesses(AccessStrideInfo, Strides,

                             OptForSize ? nullptr : &Predicates);


  if (AccessStrideInfo.empty())

    return;


  // Collect the dependences in the loop.

  collectDependences();


  // Holds all interleaved store groups temporarily.

  SmallSetVector<InterleaveGroup<Instruction> *, 4> StoreGroups;

  // Holds all interleaved load groups temporarily.

  SmallSetVector<InterleaveGroup<Instruction> *, 4> LoadGroups;

  // Groups added to this set cannot have new members added.

  SmallPtrSet<InterleaveGroup<Instruction> *, 4> CompletedLoadGroups;


  // Search in bottom-up program order for pairs of accesses (A and B) that can

  // form interleaved load or store groups. In the algorithm below, access A

  // precedes access B in program order. We initialize a group for B in the

  // outer loop of the algorithm, and then in the inner loop, we attempt to

  // insert each A into B's group if:

  //

  //  1. A and B have the same stride,

  //  2. A and B have the same memory object size, and

  //  3. A belongs in B's group according to its distance from B.

  //

  // Special care is taken to ensure group formation will not break any

  // dependences.

  for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();

       BI != E; ++BI) {

    Instruction *B = BI->first;

    StrideDescriptor DesB = BI->second;


    // Initialize a group for B if it has an allowable stride. Even if we don't

    // create a group for B, we continue with the bottom-up algorithm to ensure

    // we don't break any of B's dependences.

    InterleaveGroup<Instruction> *GroupB = nullptr;

    if (isStrided(DesB.Stride) &&

        (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {

      GroupB = getInterleaveGroup(B);

      if (!GroupB) {

        LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B

                          << '\n');

        GroupB = createInterleaveGroup(B, DesB.Stride, DesB.Alignment);

        if (B->mayWriteToMemory())

          StoreGroups.insert(GroupB);

        else

          LoadGroups.insert(GroupB);

      }

    }


    for (auto AI = std::next(BI); AI != E; ++AI) {

      Instruction *A = AI->first;

      StrideDescriptor DesA = AI->second;


      // Our code motion strategy implies that we can't have dependences

      // between accesses in an interleaved group and other accesses located

      // between the first and last member of the group. Note that this also

      // means that a group can't have more than one member at a given offset.

      // The accesses in a group can have dependences with other accesses, but

      // we must ensure we don't extend the boundaries of the group such that

      // we encompass those dependent accesses.

      //

      // For example, assume we have the sequence of accesses shown below in a

      // stride-2 loop:

      //

      //  (1, 2) is a group | A[i]   = a;  // (1)

      //                    | A[i-1] = b;  // (2) |

      //                      A[i-3] = c;  // (3)

      //                      A[i]   = d;  // (4) | (2, 4) is not a group

      //

      // Because accesses (2) and (3) are dependent, we can group (2) with (1)

      // but not with (4). If we did, the dependent access (3) would be within

      // the boundaries of the (2, 4) group.

      auto DependentMember = [&](InterleaveGroup<Instruction> *Group,

                                 StrideEntry *A) -> Instruction * {

        for (uint32_t Index = 0; Index < Group->getFactor(); ++Index) {

          Instruction *MemberOfGroupB = Group->getMember(Index);

          if (MemberOfGroupB && !canReorderMemAccessesForInterleavedGroups(

                                    A, &*AccessStrideInfo.find(MemberOfGroupB)))

            return MemberOfGroupB;

        }

        return nullptr;

      };


      auto GroupA = getInterleaveGroup(A);

      // If A is a load, dependencies are tolerable, there's nothing to do here.

      // If both A and B belong to the same (store) group, they are independent,

      // even if dependencies have not been recorded.

      // If both GroupA and GroupB are null, there's nothing to do here.

      if (A->mayWriteToMemory() && GroupA != GroupB) {

        Instruction *DependentInst = nullptr;

        // If GroupB is a load group, we have to compare AI against all

        // members of GroupB because if any load within GroupB has a dependency

        // on AI, we need to mark GroupB as complete and also release the

        // store GroupA (if A belongs to one). The former prevents incorrect

        // hoisting of load B above store A while the latter prevents incorrect

        // sinking of store A below load B.

        if (GroupB && LoadGroups.contains(GroupB))

          DependentInst = DependentMember(GroupB, &*AI);

        else if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI))

          DependentInst = B;


        if (DependentInst) {

          // A has a store dependence on B (or on some load within GroupB) and

          // is part of a store group. Release A's group to prevent illegal

          // sinking of A below B. A will then be free to form another group

          // with instructions that precede it.

          if (GroupA && StoreGroups.contains(GroupA)) {

            LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to "

                                 "dependence between "

                              << *A << " and " << *DependentInst << '\n');

            StoreGroups.remove(GroupA);

            releaseGroup(GroupA);

          }

          // If B is a load and part of an interleave group, no earlier loads

          // can be added to B's interleave group, because this would mean the

          // DependentInst would move across store A. Mark the interleave group

          // as complete.

          if (GroupB && LoadGroups.contains(GroupB)) {

            LLVM_DEBUG(dbgs() << "LV: Marking interleave group for " << *B

                              << " as complete.\n");

            CompletedLoadGroups.insert(GroupB);

          }

        }

      }

      if (CompletedLoadGroups.contains(GroupB)) {

        // Skip trying to add A to B, continue to look for other conflicting A's

        // in groups to be released.

        continue;

      }


      // At this point, we've checked for illegal code motion. If either A or B

      // isn't strided, there's nothing left to do.

      if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))

        continue;


      // Ignore A if it's already in a group or isn't the same kind of memory

      // operation as B.

      // Note that mayReadFromMemory() isn't mutually exclusive to

      // mayWriteToMemory in the case of atomic loads. We shouldn't see those

      // here, canVectorizeMemory() should have returned false - except for the

      // case we asked for optimization remarks.

      if (isInterleaved(A) ||

          (A->mayReadFromMemory() != B->mayReadFromMemory()) ||

          (A->mayWriteToMemory() != B->mayWriteToMemory()))

        continue;


      // Check rules 1 and 2. Ignore A if its stride or size is different from

      // that of B.

      if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)

        continue;


      // Ignore A if the memory object of A and B don't belong to the same

      // address space

      if (getLoadStoreAddressSpace(A) != getLoadStoreAddressSpace(B))

        continue;


      // Calculate the distance from A to B.

      const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(

          PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));

      if (!DistToB)

        continue;

      int64_t DistanceToB = DistToB->getAPInt().getSExtValue();


      // Check rule 3. Ignore A if its distance to B is not a multiple of the

      // size.

      if (DistanceToB % static_cast<int64_t>(DesB.Size))

        continue;


      // All members of a predicated interleave-group must have the same predicate,

      // and currently must reside in the same BB.

      BasicBlock *BlockA = A->getParent();

      BasicBlock *BlockB = B->getParent();

      if ((isPredicated(BlockA) || isPredicated(BlockB)) &&

          (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))

        continue;


      // The index of A is the index of B plus A's distance to B in multiples

      // of the size.

      int IndexA =

          GroupB->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);


      // Try to insert A into B's group.

      if (GroupB->insertMember(A, IndexA, DesA.Alignment)) {

        LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'

                          << "    into the interleave group with" << *B

                          << '\n');

        InterleaveGroupMap[A] = GroupB;


        // Set the first load in program order as the insert position.

        if (A->mayReadFromMemory())

          GroupB->setInsertPos(A);

      }

    } // Iteration over A accesses.

  }   // Iteration over B accesses.


  // Commit the collected predicates to PSE if any candidate group was formed.

  if (!LoadGroups.empty() || !StoreGroups.empty())

    PSE.addPredicates(Predicates);


  auto InvalidateGroupIfMemberMayWrap = [&](InterleaveGroup<Instruction> *Group,

                                            int Index,

                                            const char *FirstOrLast) -> bool {

    Instruction *Member = Group->getMember(Index);

    assert(Member && "Group member does not exist");

    Value *MemberPtr = getLoadStorePointerOperand(Member);

    Type *AccessTy = getLoadStoreType(Member);

    if (getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, *DT, Strides,

                     /*Assume=*/false, /*ShouldCheckWrap=*/true)

            .value_or(0))

      return false;

    LLVM_DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "

                      << FirstOrLast

                      << " group member potentially pointer-wrapping.\n");

    releaseGroup(Group);

    return true;

  };


  // Remove interleaved groups with gaps whose memory

  // accesses may wrap around. We have to revisit the getPtrStride analysis,

  // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does

  // not check wrapping (see documentation there).

  // FORNOW we use Assume=false;

  // TODO: Change to Assume=true but making sure we don't exceed the threshold

  // of runtime SCEV assumptions checks (thereby potentially failing to

  // vectorize altogether).

  // Additional optional optimizations:

  // TODO: If we are peeling the loop and we know that the first pointer doesn't

  // wrap then we can deduce that all pointers in the group don't wrap.

  // This means that we can forcefully peel the loop in order to only have to

  // check the first pointer for no-wrap. When we'll change to use Assume=true

  // we'll only need at most one runtime check per interleaved group.

  for (auto *Group : LoadGroups) {

    // Case 1: A full group. Can Skip the checks; For full groups, if the wide

    // load would wrap around the address space we would do a memory access at

    // nullptr even without the transformation.

    if (Group->isFull())

      continue;


    // Case 2: If first and last members of the group don't wrap this implies

    // that all the pointers in the group don't wrap.

    // So we check only group member 0 (which is always guaranteed to exist),

    // and group member Factor - 1; If the latter doesn't exist we rely on

    // peeling (if it is a non-reversed access -- see Case 3).

    if (InvalidateGroupIfMemberMayWrap(Group, 0, "first"))

      continue;

    if (Group->getMember(Group->getFactor() - 1))

      InvalidateGroupIfMemberMayWrap(Group, Group->getFactor() - 1, "last");

    else {

      // Case 3: A non-reversed interleaved load group with gaps: We need

      // to execute at least one scalar epilogue iteration. This will ensure

      // we don't speculatively access memory out-of-bounds. We only need

      // to look for a member at index factor - 1, since every group must have

      // a member at index zero.

      if (Group->isReverse()) {

        LLVM_DEBUG(

            dbgs() << "LV: Invalidate candidate interleaved group due to "

                      "a reverse access with gaps.\n");

        releaseGroup(Group);

        continue;

      }

      LLVM_DEBUG(

          dbgs() << "LV: Interleaved group requires epilogue iteration.\n");

      RequiresScalarEpilogue = true;

    }

  }


  for (auto *Group : StoreGroups) {

    // Case 1: A full group. Can Skip the checks; For full groups, if the wide

    // store would wrap around the address space we would do a memory access at

    // nullptr even without the transformation.

    if (Group->isFull())

      continue;


    // Interleave-store-group with gaps is implemented using masked wide store.

    // Remove interleaved store groups with gaps if

    // masked-interleaved-accesses are not enabled by the target.

    if (!EnablePredicatedInterleavedMemAccesses) {

      LLVM_DEBUG(

          dbgs() << "LV: Invalidate candidate interleaved store group due "

                    "to gaps.\n");

      releaseGroup(Group);

      continue;

    }


    // Case 2: If first and last members of the group don't wrap this implies

    // that all the pointers in the group don't wrap.

    // So we check only group member 0 (which is always guaranteed to exist),

    // and the last group member. Case 3 (scalar epilog) is not relevant for

    // stores with gaps, which are implemented with masked-store (rather than

    // speculative access, as in loads).

    if (InvalidateGroupIfMemberMayWrap(Group, 0, "first"))

      continue;

    for (int Index = Group->getFactor() - 1; Index > 0; Index--)

      if (Group->getMember(Index)) {

        InvalidateGroupIfMemberMayWrap(Group, Index, "last");

        break;

      }

  }

}


void InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue() {

  // If no group had triggered the requirement to create an epilogue loop,

  // there is nothing to do.

  if (!requiresScalarEpilogue())

    return;


  // Release groups requiring scalar epilogues. Note that this also removes them

  // from InterleaveGroups.

  bool ReleasedGroup = InterleaveGroups.remove_if([&](auto *Group) {

    if (!Group->requiresScalarEpilogue())

      return false;

    LLVM_DEBUG(

        dbgs()

        << "LV: Invalidate candidate interleaved group due to gaps that "

           "require a scalar epilogue (not allowed under optsize) and cannot "

           "be masked (not enabled). \n");

    releaseGroupWithoutRemovingFromSet(Group);

    return true;

  });

  assert(ReleasedGroup && "At least one group must be invalidated, as a "

                          "scalar epilogue was required");

  (void)ReleasedGroup;

  RequiresScalarEpilogue = false;

}


template <typename InstT>


void InterleaveGroup<InstT>::addMetadata(InstT *NewInst) const {

  llvm_unreachable("addMetadata can only be used for Instruction");

}


namespace llvm {

template <>


void InterleaveGroup<Instruction>::addMetadata(Instruction *NewInst) const {

  SmallVector<Value *, 4> VL(make_second_range(Members));

  propagateMetadata(NewInst, VL);

}


} // namespace llvm

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition ARMSLSHardening.cpp:73

X
#define X(NUM, ENUM, NAME)
Definition ELF.h:856

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

CommandLine.h

Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...

DemandedBits.h

DerivedTypes.h

EquivalenceClasses.h
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...

IRBuilder.h

MI
IRTranslator LLVM IR MI
Definition IRTranslator.cpp:110

Value.h

InlinePriorityMode::Size
@ Size
Definition InlineOrder.cpp:25

TemplateParamKind::Type
@ Type
Definition ItaniumDemangle.h:1243

Ops
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Definition ItaniumDemangle.h:3391

LoopInfo.h

LoopIterator.h

I
#define I(x, y, z)
Definition MD5.cpp:57

MemoryModelRelaxationAnnotations.h
This file provides utility for Memory Model Relaxation Annotations (MMRAs).

PatternMatch.h

ScalarEvolutionExpressions.h

ScalarEvolution.h

SmallVector.h
This file defines the SmallVector class.

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition Debug.h:119

getScalarSizeInBits
static unsigned getScalarSizeInBits(Type *Ty)
Definition SystemZTargetTransformInfo.cpp:531

Y
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

getType
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39

TargetTransformInfo.h
This pass exposes codegen information to IR-level passes.

ValueTracking.h

concatenateTwoVectors
static Value * concatenateTwoVectors(IRBuilderBase &Builder, Value *V1, Value *V2)
A helper function for concatenating vectors.
Definition VectorUtils.cpp:1211

MaxInterleaveGroupFactor
static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
Maximum factor for an interleaved memory access.

addToAccessGroupList
static void addToAccessGroupList(ListT &List, MDNode *AccGroups)
Add all access groups in AccGroups to List.
Definition VectorUtils.cpp:974

VectorUtils.h

Node
Definition ItaniumDemangle.h:166

VectorType
Definition ItaniumDemangle.h:1189

llvm::APInt
Class for arbitrary precision integers.
Definition APInt.h:78

llvm::APInt::getAllOnes
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235

llvm::APInt::clearBit
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1429

llvm::APInt::setBit
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353

llvm::APInt::isZero
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381

llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511

llvm::APInt::getZero
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201

llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585

llvm::ArrayRef
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40

llvm::ArrayRef::front
const T & front() const
Get the first element.
Definition ArrayRef.h:144

llvm::ArrayRef::end
iterator end() const
Definition ArrayRef.h:130

llvm::ArrayRef::size
size_t size() const
Get the array size.
Definition ArrayRef.h:141

llvm::ArrayRef::begin
iterator begin() const
Definition ArrayRef.h:129

llvm::ArrayRef::empty
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136

llvm::BasicBlock
LLVM Basic Block Representation.
Definition BasicBlock.h:62

llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition Instructions.h:1531

llvm::ConstantVector::get
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
Definition Constants.cpp:1542

llvm::Constant
This is an important base class in LLVM.
Definition Constant.h:43

llvm::DemandedBits
Definition DemandedBits.h:41

llvm::DenseMapBase::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:221

llvm::DenseMap
Definition DenseMap.h:834

llvm::EquivalenceClasses
This represents a collection of equivalence classes and supports three efficient operations: insert a...
Definition EquivalenceClasses.h:62

llvm::EquivalenceClasses::members
iterator_range< member_iterator > members(const ECValue &ECV) const
Definition EquivalenceClasses.h:174

llvm::EquivalenceClasses::getOrInsertLeaderValue
const ElemTy & getOrInsertLeaderValue(const ElemTy &V)
Return the leader for the specified value that is in the set.
Definition EquivalenceClasses.h:198

llvm::EquivalenceClasses::unionSets
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
Merge the two equivalence sets for the specified values, inserting them if they do not already exist ...
Definition EquivalenceClasses.h:316

llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114

llvm::InsertElementInst
This instruction inserts a single (scalar) element into a VectorType value.
Definition Instructions.h:1888

llvm::Instruction
Definition Instruction.h:70

llvm::Instruction::mayReadOrWriteMemory
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
Definition Instruction.h:854

llvm::Instruction::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition Instruction.h:460

llvm::Instruction::setMetadata
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition Metadata.cpp:1751

llvm::Instruction::getAllMetadataOtherThanDebugLoc
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode * > > &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
Definition Instruction.h:486

llvm::InterleaveGroup
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition VectorUtils.h:515

llvm::InterleaveGroup::getFactor
uint32_t getFactor() const
Definition VectorUtils.h:531

llvm::InterleaveGroup::getMember
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition VectorUtils.h:580

llvm::InterleaveGroup::isFull
bool isFull() const
Return true if this group is full, i.e. it has no gaps.
Definition VectorUtils.h:632

llvm::InterleaveGroup::getIndex
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition VectorUtils.h:596

llvm::InterleaveGroup::setInsertPos
void setInsertPos(InstTy *Inst)
Definition VectorUtils.h:606

llvm::InterleaveGroup::isReverse
bool isReverse() const
Definition VectorUtils.h:530

llvm::InterleaveGroup::addMetadata
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Definition VectorUtils.cpp:1713

llvm::InterleaveGroup::insertMember
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition VectorUtils.h:540

llvm::InterleavedAccessInfo::getInterleaveGroup
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
Definition VectorUtils.h:710

llvm::InterleavedAccessInfo::requiresScalarEpilogue
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
Definition VectorUtils.h:721

llvm::InterleavedAccessInfo::isInterleaved
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
Definition VectorUtils.h:702

llvm::InterleavedAccessInfo::analyzeInterleaving
LLVM_ABI void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
Definition VectorUtils.cpp:1377

llvm::InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue
LLVM_ABI void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
Definition VectorUtils.cpp:1687

llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition IntrinsicInst.h:49

llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition IntrinsicInst.h:56

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68

llvm::MDNode
Metadata node.
Definition Metadata.h:1069

llvm::MDNode::getMostGenericAliasScope
static LLVM_ABI MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
Definition Metadata.cpp:1150

llvm::MDNode::getMostGenericTBAA
static LLVM_ABI MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
Definition TypeBasedAliasAnalysis.cpp:470

llvm::MDNode::operands
ArrayRef< MDOperand > operands() const
Definition Metadata.h:1424

llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1554

llvm::MDNode::getMostGenericFPMath
static LLVM_ABI MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
Definition Metadata.cpp:1182

llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1432

llvm::MDNode::intersect
static LLVM_ABI MDNode * intersect(MDNode *A, MDNode *B)
Definition Metadata.cpp:1137

llvm::MDNode::getContext
LLVMContext & getContext() const
Definition Metadata.h:1233

llvm::MDOperand
Tracking metadata reference owned by Metadata.
Definition Metadata.h:891

llvm::MMRAMetadata::combine
static LLVM_ABI MDNode * combine(LLVMContext &Ctx, const MMRAMetadata &A, const MMRAMetadata &B)
Combines A and B according to MMRA semantics.
Definition MemoryModelRelaxationAnnotations.cpp:78

llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:38

llvm::MapVector::find
iterator find(const KeyT &Key)
Definition MapVector.h:156

llvm::MapVector::empty
bool empty() const
Definition MapVector.h:79

llvm::MapVector::rend
reverse_iterator rend()
Definition MapVector.h:76

llvm::MapVector::rbegin
reverse_iterator rbegin()
Definition MapVector.h:72

llvm::Metadata
Root of the metadata hierarchy.
Definition Metadata.h:64

llvm::MutableArrayRef
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294

llvm::PoisonValue::get
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition Constants.cpp:2034

llvm::SCEVConstant
This class represents a constant integer value.
Definition ScalarEvolutionExpressions.h:62

llvm::SCEVConstant::getAPInt
const APInt & getAPInt() const
Definition ScalarEvolutionExpressions.h:72

llvm::SetVector::remove
bool remove(const value_type &X)
Remove an item from the set vector.
Definition SetVector.h:181

llvm::SetVector::contains
bool contains(const_arg_type key) const
Check if the SetVector contains the given key.
Definition SetVector.h:252

llvm::SetVector::empty
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:100

llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151

llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition Instructions.h:1955

llvm::ShuffleVectorInst::getMaskValue
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
Definition Instructions.h:2007

llvm::ShuffleVectorInst::getType
VectorType * getType() const
Overload to return most specific vector type.
Definition Instructions.h:1998

llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition SmallPtrSet.h:460

llvm::SmallPtrSetImpl::remove_if
bool remove_if(UnaryPredicate P)
Remove elements that match the given predicate.
Definition SmallPtrSet.h:419

llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition SmallPtrSet.h:386

llvm::SmallPtrSetImpl::contains
bool contains(ConstPtrType Ptr) const
Definition SmallPtrSet.h:466

llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition SmallPtrSet.h:532

llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition SmallVector.h:592

llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition SmallVector.h:692

llvm::SmallVectorImpl::assign
void assign(size_type NumElts, ValueParamT Elt)
Definition SmallVector.h:729

llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition SmallVector.h:682

llvm::SmallVectorImpl::append
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition SmallVector.h:702

llvm::SmallVectorImpl::clear
void clear()
Definition SmallVector.h:629

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:423

llvm::SmallVectorTemplateCommon::size
size_t size() const
Definition SmallVector.h:83

llvm::SmallVectorTemplateCommon::front
reference front()
Definition SmallVector.h:308

llvm::SmallVectorTemplateCommon::empty
bool empty() const
Definition SmallVector.h:86

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1236

llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition TargetLibraryInfo.h:266

llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition TargetTransformInfo.h:268

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46

llvm::Type::subtypes
ArrayRef< Type * > subtypes() const
Definition Type.h:381

llvm::Use
A Use represents the edge between a Value definition and its users.
Definition Use.h:35

llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition User.h:207

llvm::VPCastIntrinsic::isVPCast
static LLVM_ABI bool isVPCast(Intrinsic::ID ID)
Definition IntrinsicInst.cpp:725

llvm::VPIntrinsic::getVectorLengthParamPos
static LLVM_ABI std::optional< unsigned > getVectorLengthParamPos(Intrinsic::ID IntrinsicID)
Definition IntrinsicInst.cpp:410

llvm::Value
LLVM Value Representation.
Definition Value.h:75

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255

llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258

llvm::VectorType
Base class of all SIMD vector types.
Definition DerivedTypes.h:490

llvm::VectorType::getElementType
Type * getElementType() const
Definition DerivedTypes.h:523

llvm::cl::opt
Definition CommandLine.h:1472

llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition STLFunctionalExtras.h:37

uint32_t

uint64_t

Call
CallInst * Call
Definition ObjCARCOpts.cpp:2356

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34

llvm::Intrinsic::not_intrinsic
@ not_intrinsic
Definition Intrinsics.h:50

llvm::Intrinsic::ID
unsigned ID
Definition GenericSSAContext.h:28

llvm::Intrinsic::isTriviallyScalarizable
LLVM_ABI bool isTriviallyScalarizable(ID id)
Returns true if the intrinsic is trivially scalarizable.

llvm::Intrinsic::isTargetIntrinsic
LLVM_ABI bool isTargetIntrinsic(ID IID)
isTargetIntrinsic - Returns true if IID is an intrinsic specific to a certain target.
Definition Intrinsics.cpp:669

llvm::MIPatternMatch::m_ZeroInt
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
Definition MIPatternMatch.h:278

llvm::PatternMatchHelpers::m_CombineOr
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
Definition PatternMatchHelpers.h:56

llvm::PatternMatch
Definition PatternMatch.h:51

llvm::PatternMatch::m_AllOnes
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition PatternMatch.h:508

llvm::PatternMatch::m_Add
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition PatternMatch.h:1169

llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition PatternMatch.h:53

llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition PatternMatch.h:1915

llvm::PatternMatch::m_BinOp
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition PatternMatch.h:141

llvm::PatternMatch::m_Value
auto m_Value()
Match an arbitrary value and ignore it.
Definition PatternMatch.h:135

llvm::PatternMatch::m_UndefValue
auto m_UndefValue()
Match an arbitrary UndefValue constant.
Definition PatternMatch.h:170

llvm::PatternMatch::m_Constant
auto m_Constant()
Match an arbitrary Constant and ignore it.
Definition PatternMatch.h:176

llvm::PatternMatch::m_ContainsMatchingVectorElement
ContainsMatchingVectorElement_match< SPTy > m_ContainsMatchingVectorElement(const SPTy &SubPattern)
Match a vector constant where at least one of its elements matches the subpattern.
Definition PatternMatch.h:200

llvm::PatternMatch::m_Shuffle
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Definition PatternMatch.h:2031

llvm::PatternMatch::m_InsertElt
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Definition PatternMatch.h:1945

llvm::PatternMatch::m_ConstantInt
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition PatternMatch.h:179

llvm::cl::Hidden
@ Hidden
Definition CommandLine.h:139

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition CommandLine.h:440

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition FunctionInfo.h:25

llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739

llvm::getLoadStoreAddressSpace
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
Definition Instructions.h:5340

llvm::getVectorIntrinsicIDForCall
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Definition VectorUtils.cpp:238

llvm::canInstructionHaveMMRAs
LLVM_ABI bool canInstructionHaveMMRAs(const Instruction &I)
Definition MemoryModelRelaxationAnnotations.cpp:181

llvm::possiblyDemandedEltsInMask
LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
Definition VectorUtils.cpp:1275

llvm::Depth
@ Depth
Definition SIMachineScheduler.h:36

llvm::enumerate
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2554

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643

llvm::getLoadStorePointerOperand
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Definition Instructions.h:5294

llvm::createUnaryMask
LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
Definition VectorUtils.cpp:1191

llvm::getMetadataToPropagate
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
Definition VectorUtils.cpp:1058

llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition iterator_range.h:67

llvm::bit_width
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:325

llvm::concatenateVectors
LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Definition VectorUtils.cpp:1233

llvm::getLoadStoreAlignment
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
Definition Instructions.h:5320

llvm::widenShuffleMaskElts
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Definition VectorUtils.cpp:541

llvm::propagateMetadata
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Definition VectorUtils.cpp:1081

llvm::getSplatValue
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
Definition VectorUtils.cpp:392

llvm::bit_ceil
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:362

llvm::equal_to
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2173

llvm::intersectAccessGroups
LLVM_ABI MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
Definition VectorUtils.cpp:1010

llvm::Value
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
Definition InstrProf.h:143

llvm::M1
unsigned M1(unsigned Val)
Definition VE.h:377

llvm::WinX64EHUnwindMode::V1
@ V1
Definition CodeGen.h:171

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746

llvm::getShuffleDemandedElts
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
Definition VectorUtils.cpp:453

llvm::isSplatValue
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Definition VectorUtils.cpp:407

llvm::createBitMaskForGaps
LLVM_ABI Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
Definition VectorUtils.cpp:1130

llvm::MaxAnalysisRecursionDepth
constexpr unsigned MaxAnalysisRecursionDepth
Definition ValueTracking.h:47

llvm::createStrideMask
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
Definition VectorUtils.cpp:1170

llvm::getHorizDemandedEltsForFirstOperand
LLVM_ABI void getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS)
Compute the demanded elements mask of horizontal binary operations.
Definition VectorUtils.cpp:781

llvm::createReplicatedMask
LLVM_ABI llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
Definition VectorUtils.cpp:1150

llvm::ComplexDeinterleavingOperation::Splat
@ Splat
Definition ComplexDeinterleavingPass.h:42

llvm::getDeinterleaveIntrinsicFactor
LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
Definition VectorUtils.cpp:273

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209

llvm::getInterleaveIntrinsicFactor
LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.interleaveN intrinsics.
Definition VectorUtils.cpp:252

llvm::isa
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547

llvm::PoisonMaskElem
constexpr int PoisonMaskElem
Definition Instructions.h:1943

llvm::isTriviallyScalarizable
LLVM_ABI bool isTriviallyScalarizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially scalarizable.
Definition VectorUtils.cpp:132

llvm::isValidAsAccessGroup
LLVM_ABI bool isValidAsAccessGroup(MDNode *AccGroup)
Return whether an MDNode might represent an access group.
Definition LoopInfo.cpp:1193

llvm::getIntrinsicForCallSite
LLVM_ABI Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB, const TargetLibraryInfo *TLI)
Map a call instruction to an intrinsic ID.
Definition ValueTracking.cpp:4643

llvm::isVectorIntrinsicWithStructReturnOverloadAtField
LLVM_ABI bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
Definition VectorUtils.cpp:221

llvm::TTI
TargetTransformInfo TTI
Definition TargetTransformInfo.h:263

llvm::narrowShuffleMaskElts
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Definition VectorUtils.cpp:520

llvm::isMaskedSlidePair
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
Definition VectorUtils.cpp:490

llvm::getDeinterleavedVectorType
LLVM_ABI VectorType * getDeinterleavedVectorType(IntrinsicInst *DI)
Given a deinterleaveN intrinsic, return the (narrow) vector type of each factor.
Definition VectorUtils.cpp:294

llvm::createInterleaveMask
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
Definition VectorUtils.cpp:1159

llvm::isVectorIntrinsicWithScalarOpAtArg
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition VectorUtils.cpp:140

llvm::replaceSymbolicStrideSCEV
LLVM_ABI const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
Definition LoopAccessAnalysis.cpp:155

llvm::findScalarElement
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
Definition VectorUtils.cpp:306

llvm::uniteAccessGroups
LLVM_ABI MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
Definition VectorUtils.cpp:989

llvm::M0
unsigned M0(unsigned Val)
Definition VE.h:376

llvm::make_second_range
auto make_second_range(ContainerTy &&c)
Given a container of pairs, return a range over the second elements.
Definition STLExtras.h:1409

llvm::count_if
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2019

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559

llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772

llvm::getShuffleMaskWithWidestElts
LLVM_ABI void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
Definition VectorUtils.cpp:652

llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947

llvm::getLoadStoreType
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
Definition Instructions.h:5349

llvm::processShuffleMasks
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
Definition VectorUtils.cpp:666

llvm::all_equal
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition STLExtras.h:2166

llvm::maskContainsAllOneOrUndef
LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
Definition VectorUtils.cpp:1261

llvm::seq
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305

llvm::isTriviallyVectorizable
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition VectorUtils.cpp:46

llvm::createSequentialMask
LLVM_ABI llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Definition VectorUtils.cpp:1178

llvm::getPtrStride
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool ShouldCheckWrap=true, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
If the pointer has a constant stride return it in units of the access type size.
Definition LoopAccessAnalysis.cpp:1659

llvm::isVectorIntrinsicWithOverloadTypeAtArg
LLVM_ABI bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
Definition VectorUtils.cpp:181

llvm::computeMinimumValueSizes
LLVM_ABI MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
Definition VectorUtils.cpp:810

llvm::scaleShuffleMaskElts
LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
Definition VectorUtils.cpp:627

llvm::getSplatIndex
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
Definition VectorUtils.cpp:370

std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862

llvm::PatternMatch::m_ZeroMask
Definition PatternMatch.h:1984

llvm::cl::desc
Definition CommandLine.h:406