doxygen/LoopVectorizationPlanner_8cpp_source.html

//===- LoopVectorizationPlanner.cpp - VF selection and planning -----------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

///

/// \file

/// This file implements VFSelectionContext methods for loop vectorization

/// VF selection, independent of cost-modeling decisions.

///

//===----------------------------------------------------------------------===//


#include "LoopVectorizationPlanner.h"

#include "llvm/Analysis/LoopInfo.h"

#include "llvm/Analysis/OptimizationRemarkEmitter.h"

#include "llvm/Analysis/ScalarEvolution.h"

#include "llvm/IR/DiagnosticInfo.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/MathExtras.h"

#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"

#include "llvm/Transforms/Vectorize/LoopVectorize.h"


using namespace llvm;


#define DEBUG_TYPE "loop-vectorize"


extern cl::opt<bool> VPlanBuildOuterloopStressTest;


static cl::opt<bool> MaximizeBandwidth(

    "vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden,

    cl::desc("Maximize bandwidth when selecting vectorization factor which "

             "will be determined by the smallest type in loop."));


static cl::opt<bool> UseWiderVFIfCallVariantsPresent(

    "vectorizer-maximize-bandwidth-for-vector-calls", cl::init(true),

    cl::Hidden,

    cl::desc("Try wider VFs if they enable the use of vector variants"));


static cl::opt<bool> ConsiderRegPressure(

    "vectorizer-consider-reg-pressure", cl::init(false), cl::Hidden,

    cl::desc("Discard VFs if their register pressure is too high."));


static cl::opt<bool> ForceTargetSupportsScalableVectors(

    "force-target-supports-scalable-vectors", cl::init(false), cl::Hidden,

    cl::desc(

        "Pretend that scalable vectors are supported, even if the target does "

        "not support them. This flag should only be used for testing."));


cl::opt<bool> llvm::PreferInLoopReductions(

    "prefer-inloop-reductions", cl::init(false), cl::Hidden,

    cl::desc("Prefer in-loop vector reductions, "

             "overriding the targets preference."));


/// Note: This currently only applies to `llvm.masked.load` and

/// `llvm.masked.store`. TODO: Extend this to cover other operations as needed.

static cl::opt<bool> ForceTargetSupportsMaskedMemoryOps(

    "force-target-supports-masked-memory-ops", cl::init(false), cl::Hidden,

    cl::desc("Assume the target supports masked memory operations (used for "

             "testing)."));


static cl::opt<bool> ForceTargetSupportsGatherScatterOps(

    "force-target-supports-gather-scatter-ops", cl::init(false), cl::Hidden,

    cl::desc("Assume the target supports gather/scatter operations (used for "

             "testing)."));


bool VFSelectionContext::isLegalMaskedLoadOrStore(Instruction *I,

                                                  ElementCount VF) const {

  assert(isa<LoadInst>(I) || isa<StoreInst>(I));

  auto *Ty = getLoadStoreType(I);

  const unsigned AS = getLoadStoreAddressSpace(I);

  const Align Alignment = getLoadStoreAlignment(I);


  return ForceTargetSupportsMaskedMemoryOps ||

         (isa<LoadInst>(I) ? TTI.isLegalMaskedLoad(Ty, Alignment, AS)

                           : TTI.isLegalMaskedStore(Ty, Alignment, AS));

}


bool VFSelectionContext::isLegalGatherOrScatter(Value *V,

                                                ElementCount VF) const {

  bool LI = isa<LoadInst>(V);

  bool SI = isa<StoreInst>(V);

  if (!LI && !SI)

    return false;

  auto *Ty = getLoadStoreType(V);

  Align Align = getLoadStoreAlignment(V);

  if (VF.isVector())

    Ty = VectorType::get(Ty, VF);

  return ForceTargetSupportsGatherScatterOps ||

         (LI && TTI.isLegalMaskedGather(Ty, Align)) ||

         (SI && TTI.isLegalMaskedScatter(Ty, Align));

}


bool VFSelectionContext::supportsScalableVectors() const {

  return TTI.supportsScalableVectors() || ForceTargetSupportsScalableVectors;

}


bool VFSelectionContext::useMaxBandwidth(bool IsScalable) const {

  TargetTransformInfo::RegisterKind RegKind =

      IsScalable ? TargetTransformInfo::RGK_ScalableVector

                 : TargetTransformInfo::RGK_FixedWidthVector;

  return MaximizeBandwidth || (MaximizeBandwidth.getNumOccurrences() == 0 &&

                               (TTI.shouldMaximizeVectorBandwidth(RegKind) ||

                                (UseWiderVFIfCallVariantsPresent &&

                                 Legal->hasVectorCallVariants())));

}


bool VFSelectionContext::shouldConsiderRegPressureForVF(ElementCount VF) const {

  if (ConsiderRegPressure.getNumOccurrences())

    return ConsiderRegPressure;


  // TODO: We should eventually consider register pressure for all targets. The

  // TTI hook is temporary whilst target-specific issues are being fixed.

  if (TTI.shouldConsiderVectorizationRegPressure())

    return true;


  if (!useMaxBandwidth(VF.isScalable()))

    return false;

  // Only calculate register pressure for VFs enabled by MaxBandwidth.

  return ElementCount::isKnownGT(

      VF, VF.isScalable() ? MaxPermissibleVFWithoutMaxBW.ScalableVF

                          : MaxPermissibleVFWithoutMaxBW.FixedVF);

}


ElementCount VFSelectionContext::clampVFByMaxTripCount(

    ElementCount VF, unsigned MaxTripCount, unsigned UserIC,

    bool FoldTailByMasking, bool RequiresScalarEpilogue) const {

  unsigned EstimatedVF = VF.getKnownMinValue();

  if (VF.isScalable() && F.hasFnAttribute(Attribute::VScaleRange)) {

    auto Attr = F.getFnAttribute(Attribute::VScaleRange);

    auto Min = Attr.getVScaleRangeMin();

    EstimatedVF *= Min;

  }


  // When a scalar epilogue is required, at least one iteration of the scalar

  // loop has to execute. Adjust MaxTripCount accordingly to avoid picking a

  // max VF that results in a dead vector loop.

  if (MaxTripCount > 0 && RequiresScalarEpilogue)

    MaxTripCount -= 1;


  // When the user specifies an interleave count, we need to ensure that

  // VF * UserIC <= MaxTripCount to avoid a dead vector loop.

  unsigned IC = UserIC > 0 ? UserIC : 1;

  unsigned EstimatedVFTimesIC = EstimatedVF * IC;


  if (MaxTripCount && MaxTripCount <= EstimatedVFTimesIC &&

      (!FoldTailByMasking || isPowerOf2_32(MaxTripCount))) {

    // If upper bound loop trip count (TC) is known at compile time there is no

    // point in choosing VF greater than TC / IC (as done in the loop below).

    // Select maximum power of two which doesn't exceed TC / IC. If VF is

    // scalable, we only fall back on a fixed VF when the TC is less than or

    // equal to the known number of lanes.

    auto ClampedUpperTripCount = llvm::bit_floor(MaxTripCount / IC);

    if (ClampedUpperTripCount == 0)

      ClampedUpperTripCount = 1;

    LLVM_DEBUG(dbgs() << "LV: Clamping the MaxVF to maximum power of two not "

                         "exceeding the constant trip count"

                      << (UserIC > 0 ? " divided by UserIC" : "") << ": "

                      << ClampedUpperTripCount << "\n");

    return ElementCount::get(ClampedUpperTripCount,

                             FoldTailByMasking ? VF.isScalable() : false);

  }

  return VF;

}


ElementCount VFSelectionContext::getMaximizedVFForTarget(

    unsigned MaxTripCount, unsigned SmallestType, unsigned WidestType,

    ElementCount MaxSafeVF, unsigned UserIC, bool FoldTailByMasking,

    bool RequiresScalarEpilogue) {

  bool ComputeScalableMaxVF = MaxSafeVF.isScalable();

  const TypeSize WidestRegister = TTI.getRegisterBitWidth(

      ComputeScalableMaxVF ? TargetTransformInfo::RGK_ScalableVector

                           : TargetTransformInfo::RGK_FixedWidthVector);


  // Convenience function to return the minimum of two ElementCounts.

  auto MinVF = [](const ElementCount &LHS, const ElementCount &RHS) {

    assert((LHS.isScalable() == RHS.isScalable()) &&

           "Scalable flags must match");

    return ElementCount::isKnownLT(LHS, RHS) ? LHS : RHS;

  };


  // Ensure MaxVF is a power of 2; the dependence distance bound may not be.

  // Note that both WidestRegister and WidestType may not be a powers of 2.

  auto MaxVectorElementCount = ElementCount::get(

      llvm::bit_floor(WidestRegister.getKnownMinValue() / WidestType),

      ComputeScalableMaxVF);

  MaxVectorElementCount = MinVF(MaxVectorElementCount, MaxSafeVF);

  LLVM_DEBUG(dbgs() << "LV: The Widest register safe to use is: "

                    << (MaxVectorElementCount * WidestType) << " bits.\n");


  if (!MaxVectorElementCount) {

    LLVM_DEBUG(dbgs() << "LV: The target has no "

                      << (ComputeScalableMaxVF ? "scalable" : "fixed")

                      << " vector registers.\n");

    return ElementCount::getFixed(1);

  }


  ElementCount MaxVF =

      clampVFByMaxTripCount(MaxVectorElementCount, MaxTripCount, UserIC,

                            FoldTailByMasking, RequiresScalarEpilogue);

  // If the MaxVF was already clamped, there's no point in trying to pick a

  // larger one.

  if (MaxVF != MaxVectorElementCount)

    return MaxVF;


  if (MaxVF.isScalable())

    MaxPermissibleVFWithoutMaxBW.ScalableVF = MaxVF;

  else

    MaxPermissibleVFWithoutMaxBW.FixedVF = MaxVF;


  if (useMaxBandwidth(ComputeScalableMaxVF)) {

    auto MaxVectorElementCountMaxBW = ElementCount::get(

        llvm::bit_floor(WidestRegister.getKnownMinValue() / SmallestType),

        ComputeScalableMaxVF);

    MaxVF = MinVF(MaxVectorElementCountMaxBW, MaxSafeVF);


    if (ElementCount MinVF =

            TTI.getMinimumVF(SmallestType, ComputeScalableMaxVF)) {

      if (ElementCount::isKnownLT(MaxVF, MinVF)) {

        LLVM_DEBUG(dbgs() << "LV: Overriding calculated MaxVF(" << MaxVF

                          << ") with target's minimum: " << MinVF << '\n');

        MaxVF = MinVF;

      }

    }


    MaxVF = clampVFByMaxTripCount(MaxVF, MaxTripCount, UserIC,

                                  FoldTailByMasking, RequiresScalarEpilogue);

  }

  return MaxVF;

}


std::optional<unsigned> llvm::getMaxVScale(const Function &F,

                                           const TargetTransformInfo &TTI) {

  if (std::optional<unsigned> MaxVScale = TTI.getMaxVScale())

    return MaxVScale;


  if (F.hasFnAttribute(Attribute::VScaleRange))

    return F.getFnAttribute(Attribute::VScaleRange).getVScaleRangeMax();


  return std::nullopt;

}


bool VFSelectionContext::isScalableVectorizationAllowed() {

  if (IsScalableVectorizationAllowed)

    return *IsScalableVectorizationAllowed;


  IsScalableVectorizationAllowed = false;

  if (!supportsScalableVectors())

    return false;


  if (Hints->isScalableVectorizationDisabled()) {

    reportVectorizationInfo("Scalable vectorization is explicitly disabled",

                            "ScalableVectorizationDisabled", ORE, TheLoop);

    return false;

  }


  LLVM_DEBUG(dbgs() << "LV: Scalable vectorization is available\n");


  auto MaxScalableVF = ElementCount::getScalable(

      std::numeric_limits<ElementCount::ScalarTy>::max());


  // Test that the loop-vectorizer can legalize all operations for this MaxVF.

  // FIXME: While for scalable vectors this is currently sufficient, this should

  // be replaced by a more detailed mechanism that filters out specific VFs,

  // instead of invalidating vectorization for a whole set of VFs based on the

  // MaxVF.


  // Disable scalable vectorization if the loop contains unsupported reductions.

  if (!all_of(Legal->getReductionVars(), [&](const auto &Reduction) -> bool {

        return TTI.isLegalToVectorizeReduction(Reduction.second, MaxScalableVF);

      })) {

    reportVectorizationInfo(

        "Scalable vectorization not supported for the reduction "

        "operations found in this loop.",

        "ScalableVFUnfeasible", ORE, TheLoop);

    return false;

  }


  // Disable scalable vectorization if the loop contains any instructions

  // with element types not supported for scalable vectors.

  if (any_of(ElementTypesInLoop, [&](Type *Ty) {

        return !Ty->isVoidTy() && !TTI.isElementTypeLegalForScalableVector(Ty);

      })) {

    reportVectorizationInfo("Scalable vectorization is not supported "

                            "for all element types found in this loop.",

                            "ScalableVFUnfeasible", ORE, TheLoop);

    return false;

  }


  if (!Legal->isSafeForAnyVectorWidth() && !getMaxVScale(F, TTI)) {

    reportVectorizationInfo("The target does not provide maximum vscale value "

                            "for safe distance analysis.",

                            "ScalableVFUnfeasible", ORE, TheLoop);

    return false;

  }


  IsScalableVectorizationAllowed = true;

  return true;

}


ElementCount

VFSelectionContext::getMaxLegalScalableVF(unsigned MaxSafeElements) {

  if (!isScalableVectorizationAllowed())

    return ElementCount::getScalable(0);


  auto MaxScalableVF = ElementCount::getScalable(

      std::numeric_limits<ElementCount::ScalarTy>::max());

  if (Legal->isSafeForAnyVectorWidth())

    return MaxScalableVF;


  std::optional<unsigned> MaxVScale = getMaxVScale(F, TTI);

  // Limit MaxScalableVF by the maximum safe dependence distance.

  MaxScalableVF = ElementCount::getScalable(MaxSafeElements / *MaxVScale);


  if (!MaxScalableVF)

    reportVectorizationInfo(

        "Max legal vector width too small, scalable vectorization "

        "unfeasible.",

        "ScalableVFUnfeasible", ORE, TheLoop);


  return MaxScalableVF;

}


FixedScalableVFPair VFSelectionContext::computeFeasibleMaxVF(

    unsigned MaxTripCount, ElementCount UserVF, unsigned UserIC,

    bool FoldTailByMasking, bool RequiresScalarEpilogue) {

  auto [SmallestType, WidestType] = getSmallestAndWidestTypes();


  // Get the maximum safe dependence distance in bits computed by LAA.

  // It is computed by MaxVF * sizeOf(type) * 8, where type is taken from

  // the memory accesses that is most restrictive (involved in the smallest

  // dependence distance).

  unsigned MaxSafeElementsPowerOf2 =

      llvm::bit_floor(Legal->getMaxSafeVectorWidthInBits() / WidestType);

  if (!Legal->isSafeForAnyStoreLoadForwardDistances()) {

    unsigned SLDist = Legal->getMaxStoreLoadForwardSafeDistanceInBits();

    MaxSafeElementsPowerOf2 =

        std::min(MaxSafeElementsPowerOf2, SLDist / WidestType);

  }


  auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElementsPowerOf2);

  auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElementsPowerOf2);


  if (!Legal->isSafeForAnyVectorWidth())

    MaxSafeElements = MaxSafeElementsPowerOf2;


  LLVM_DEBUG(dbgs() << "LV: The max safe fixed VF is: " << MaxSafeFixedVF

                    << ".\n");

  LLVM_DEBUG(dbgs() << "LV: The max safe scalable VF is: " << MaxSafeScalableVF

                    << ".\n");


  // First analyze the UserVF, fall back if the UserVF should be ignored.

  if (UserVF) {

    auto MaxSafeUserVF =

        UserVF.isScalable() ? MaxSafeScalableVF : MaxSafeFixedVF;


    if (ElementCount::isKnownLE(UserVF, MaxSafeUserVF)) {

      // If `VF=vscale x N` is safe, then so is `VF=N`

      if (UserVF.isScalable())

        return FixedScalableVFPair(

            ElementCount::getFixed(UserVF.getKnownMinValue()), UserVF);


      return UserVF;

    }


    assert(ElementCount::isKnownGT(UserVF, MaxSafeUserVF));


    // Only clamp if the UserVF is not scalable. If the UserVF is scalable, it

    // is better to ignore the hint and let the compiler choose a suitable VF.

    if (!UserVF.isScalable()) {

      LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF

                        << " is unsafe, clamping to max safe VF="

                        << MaxSafeFixedVF << ".\n");

      ORE->emit([&]() {

        return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor",

                                          TheLoop->getStartLoc(),

                                          TheLoop->getHeader())

               << "User-specified vectorization factor "

               << ore::NV("UserVectorizationFactor", UserVF)

               << " is unsafe, clamping to maximum safe vectorization factor "

               << ore::NV("VectorizationFactor", MaxSafeFixedVF);

      });

      return MaxSafeFixedVF;

    }


    if (!supportsScalableVectors()) {

      LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF

                        << " is ignored because scalable vectors are not "

                           "available.\n");

      ORE->emit([&]() {

        return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor",

                                          TheLoop->getStartLoc(),

                                          TheLoop->getHeader())

               << "User-specified vectorization factor "

               << ore::NV("UserVectorizationFactor", UserVF)

               << " is ignored because the target does not support scalable "

                  "vectors. The compiler will pick a more suitable value.";

      });

    } else {

      LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF

                        << " is unsafe. Ignoring scalable UserVF.\n");

      ORE->emit([&]() {

        return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor",

                                          TheLoop->getStartLoc(),

                                          TheLoop->getHeader())

               << "User-specified vectorization factor "

               << ore::NV("UserVectorizationFactor", UserVF)

               << " is unsafe. Ignoring the hint to let the compiler pick a "

                  "more suitable value.";

      });

    }

  }


  LLVM_DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType

                    << " / " << WidestType << " bits.\n");


  FixedScalableVFPair Result(ElementCount::getFixed(1),

                             ElementCount::getScalable(0));

  if (auto MaxVF = getMaximizedVFForTarget(

          MaxTripCount, SmallestType, WidestType, MaxSafeFixedVF, UserIC,

          FoldTailByMasking, RequiresScalarEpilogue))

    Result.FixedVF = MaxVF;


  if (auto MaxVF = getMaximizedVFForTarget(

          MaxTripCount, SmallestType, WidestType, MaxSafeScalableVF, UserIC,

          FoldTailByMasking, RequiresScalarEpilogue))

    if (MaxVF.isScalable()) {

      Result.ScalableVF = MaxVF;

      LLVM_DEBUG(dbgs() << "LV: Found feasible scalable VF = " << MaxVF

                        << "\n");

    }


  return Result;

}


std::pair<unsigned, unsigned>


VFSelectionContext::getSmallestAndWidestTypes() const {

  unsigned MinWidth = -1U;

  unsigned MaxWidth = 8;

  const DataLayout &DL = F.getDataLayout();

  // For in-loop reductions, no element types are added to ElementTypesInLoop

  // if there are no loads/stores in the loop. In this case, check through the

  // reduction variables to determine the maximum width.

  if (ElementTypesInLoop.empty() && !Legal->getReductionVars().empty()) {

    for (const auto &[_, RdxDesc] : Legal->getReductionVars()) {

      // When finding the min width used by the recurrence we need to account

      // for casts on the input operands of the recurrence.

      MinWidth = std::min(

          MinWidth,

          std::min(RdxDesc.getMinWidthCastToRecurrenceTypeInBits(),

                   RdxDesc.getRecurrenceType()->getScalarSizeInBits()));

      MaxWidth = std::max(MaxWidth,

                          RdxDesc.getRecurrenceType()->getScalarSizeInBits());

    }

  } else {

    for (Type *T : ElementTypesInLoop) {

      MinWidth = std::min<unsigned>(

          MinWidth, DL.getTypeSizeInBits(T->getScalarType()).getFixedValue());

      MaxWidth = std::max<unsigned>(

          MaxWidth, DL.getTypeSizeInBits(T->getScalarType()).getFixedValue());

    }

  }

  return {MinWidth, MaxWidth};

}


void VFSelectionContext::collectElementTypesForWidening(

    const SmallPtrSetImpl<const Value *> *ValuesToIgnore) {

  ElementTypesInLoop.clear();

  // For each block.

  for (BasicBlock *BB : TheLoop->blocks()) {

    // For each instruction in the loop.

    for (Instruction &I : *BB) {

      Type *T = I.getType();


      // Skip ignored values.

      if (ValuesToIgnore && ValuesToIgnore->contains(&I))

        continue;


      // Only examine Loads, Stores and PHINodes.

      if (!isa<LoadInst, StoreInst, PHINode>(I))

        continue;


      // Examine PHI nodes that are reduction variables. Update the type to

      // account for the recurrence type.

      if (auto *PN = dyn_cast<PHINode>(&I)) {

        if (!Legal->isReductionVariable(PN))

          continue;

        const RecurrenceDescriptor &RdxDesc =

            Legal->getRecurrenceDescriptor(PN);

        if (PreferInLoopReductions || useOrderedReductions(RdxDesc) ||

            TTI.preferInLoopReduction(RdxDesc.getRecurrenceKind(),

                                      RdxDesc.getRecurrenceType()))

          continue;

        T = RdxDesc.getRecurrenceType();

      }


      // Examine the stored values.

      if (auto *ST = dyn_cast<StoreInst>(&I))

        T = ST->getValueOperand()->getType();


      assert(T->isSized() &&

             "Expected the load/store/recurrence type to be sized");


      ElementTypesInLoop.insert(T);

    }

  }

}


void VFSelectionContext::initializeVScaleForTuning() {

  if (!supportsScalableVectors())

    return;


  if (F.hasFnAttribute(Attribute::VScaleRange)) {

    auto Attr = F.getFnAttribute(Attribute::VScaleRange);

    auto Min = Attr.getVScaleRangeMin();

    auto Max = Attr.getVScaleRangeMax();

    if (Max && Min == Max) {

      VScaleForTuning = Max;

      return;

    }

  }


  VScaleForTuning = TTI.getVScaleForTuning();

}


bool VFSelectionContext::useOrderedReductions(

    const RecurrenceDescriptor &RdxDesc) const {

  return !Hints->allowReordering() && RdxDesc.isOrdered();

}


bool VFSelectionContext::runtimeChecksRequired() {

  LLVM_DEBUG(dbgs() << "LV: Performing code size checks.\n");


  Loop *L = const_cast<Loop *>(TheLoop);

  if (Legal->getRuntimePointerChecking()->Need) {

    reportVectorizationFailure(

        "Runtime ptr check is required with -Os/-Oz",

        "runtime pointer checks needed. Enable vectorization of this "

        "loop with '#pragma clang loop vectorize(enable)' when "

        "compiling with -Os/-Oz",

        "CantVersionLoopWithOptForSize", ORE, L);

    return true;

  }


  if (!PSE.getPredicate().isAlwaysTrue()) {

    reportVectorizationFailure(

        "Runtime SCEV check is required with -Os/-Oz",

        "runtime SCEV checks needed. Enable vectorization of this "

        "loop with '#pragma clang loop vectorize(enable)' when "

        "compiling with -Os/-Oz",

        "CantVersionLoopWithOptForSize", ORE, L);

    return true;

  }


  // FIXME: Avoid specializing for stride==1 instead of bailing out.

  if (!Legal->getLAI()->getSymbolicStrides().empty()) {

    reportVectorizationFailure(

        "Runtime stride check for small trip count",

        "runtime stride == 1 checks needed. Enable vectorization of "

        "this loop without such check by compiling with -Os/-Oz",

        "CantVersionLoopWithOptForSize", ORE, L);

    return true;

  }


  return false;

}


void VFSelectionContext::computeMinimalBitwidths() {

  MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI);

}


void VFSelectionContext::collectInLoopReductions() {

  // Avoid duplicating work finding in-loop reductions.

  if (!InLoopReductions.empty())

    return;


  for (const auto &Reduction : Legal->getReductionVars()) {

    PHINode *Phi = Reduction.first;

    const RecurrenceDescriptor &RdxDesc = Reduction.second;


    // Multi-use reductions (e.g., used in FindLastIV patterns) are handled

    // separately and should not be considered for in-loop reductions.

    if (RdxDesc.hasUsesOutsideReductionChain())

      continue;


    // We don't collect reductions that are type promoted (yet).

    if (RdxDesc.getRecurrenceType() != Phi->getType())

      continue;


    // In-loop AnyOf and FindIV reductions are not yet supported.

    RecurKind Kind = RdxDesc.getRecurrenceKind();

    if (RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) ||

        RecurrenceDescriptor::isFindIVRecurrenceKind(Kind) ||

        RecurrenceDescriptor::isFindLastRecurrenceKind(Kind))

      continue;


    // If the target would prefer this reduction to happen "in-loop", then we

    // want to record it as such.

    if (!PreferInLoopReductions && !useOrderedReductions(RdxDesc) &&

        !TTI.preferInLoopReduction(Kind, Phi->getType()))

      continue;


    // Check that we can correctly put the reductions into the loop, by

    // finding the chain of operations that leads from the phi to the loop

    // exit value.

    SmallVector<Instruction *, 4> ReductionOperations =

        RdxDesc.getReductionOpChain(Phi, const_cast<Loop *>(TheLoop));

    bool InLoop = !ReductionOperations.empty();


    if (InLoop) {

      InLoopReductions.insert(Phi);

      // Add the elements to InLoopReductionImmediateChains for cost modelling.

      Instruction *LastChain = Phi;

      for (auto *I : ReductionOperations) {

        InLoopReductionImmediateChains[I] = LastChain;

        LastChain = I;

      }

    }

    LLVM_DEBUG(dbgs() << "LV: Using " << (InLoop ? "inloop" : "out of loop")

                      << " reduction for phi: " << *Phi << "\n");

  }

}


// TODO: we could return a pair of values that specify the max VF and

// min VF, to be used in `buildVPlans(MinVF, MaxVF)` instead of

// `buildVPlans(VF, VF)`. We cannot do it because VPLAN at the moment

// doesn't have a cost model that can choose which plan to execute if

// more than one is generated.

FixedScalableVFPair


VFSelectionContext::computeVPlanOuterloopVF(ElementCount UserVF) {

  if (UserVF.isScalable() && !supportsScalableVectors()) {

    reportVectorizationFailure(

        "Scalable vectorization requested but not supported by the target",

        "the scalable user-specified vectorization width for outer-loop "

        "vectorization cannot be used because the target does not support "

        "scalable vectors.",

        "ScalableVFUnfeasible", ORE, TheLoop);

    return FixedScalableVFPair::getNone();

  }


  ElementCount VF = UserVF;

  if (VF.isZero()) {

    auto [_, WidestType] = getSmallestAndWidestTypes();


    auto RegKind = TTI.enableScalableVectorization()

                       ? TargetTransformInfo::RGK_ScalableVector

                       : TargetTransformInfo::RGK_FixedWidthVector;


    TypeSize RegSize = TTI.getRegisterBitWidth(RegKind);

    unsigned N = RegSize.getKnownMinValue() / WidestType;

    VF = ElementCount::get(N, RegSize.isScalable());

    LLVM_DEBUG(dbgs() << "LV: VPlan computed VF " << VF << ".\n");


    // Make sure we have a VF > 1 for stress testing.

    if (VPlanBuildOuterloopStressTest && VF.isScalar()) {

      LLVM_DEBUG(dbgs() << "LV: VPlan stress testing: "

                        << "overriding computed VF.\n");

      VF = ElementCount::getFixed(4);

    }

  }

  assert(isPowerOf2_32(VF.getKnownMinValue()) &&

         "VF needs to be a power of two");

  if (VF.isScalar())

    return FixedScalableVFPair::getNone();

  LLVM_DEBUG(dbgs() << "LV: Using " << (!UserVF.isZero() ? "user " : "")

                    << "VF " << VF << " to build VPlans.\n");

  return FixedScalableVFPair(VF);

}


RegSize
unsigned RegSize
Definition AArch64MIPeepholeOpt.cpp:174

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition ARMSLSHardening.cpp:73

CommandLine.h

DiagnosticInfo.h

DEBUG_TYPE
#define DEBUG_TYPE
Definition GenericCycleImpl.h:31

_
#define _
Definition HexagonMCCodeEmitter.cpp:46

TemplateParamKind::Type
@ Type
Definition ItaniumDemangle.h:1243

LoopInfo.h

Reduction
loop Loop Strength Reduction
Definition LoopStrengthReduce.cpp:7161

LoopVectorizationLegality.h
This file defines the LoopVectorizationLegality class.

ForceTargetSupportsGatherScatterOps
static cl::opt< bool > ForceTargetSupportsGatherScatterOps("force-target-supports-gather-scatter-ops", cl::init(false), cl::Hidden, cl::desc("Assume the target supports gather/scatter operations (used for " "testing)."))

VPlanBuildOuterloopStressTest
cl::opt< bool > VPlanBuildOuterloopStressTest

ForceTargetSupportsScalableVectors
static cl::opt< bool > ForceTargetSupportsScalableVectors("force-target-supports-scalable-vectors", cl::init(false), cl::Hidden, cl::desc("Pretend that scalable vectors are supported, even if the target does " "not support them. This flag should only be used for testing."))

ConsiderRegPressure
static cl::opt< bool > ConsiderRegPressure("vectorizer-consider-reg-pressure", cl::init(false), cl::Hidden, cl::desc("Discard VFs if their register pressure is too high."))

UseWiderVFIfCallVariantsPresent
static cl::opt< bool > UseWiderVFIfCallVariantsPresent("vectorizer-maximize-bandwidth-for-vector-calls", cl::init(true), cl::Hidden, cl::desc("Try wider VFs if they enable the use of vector variants"))

ForceTargetSupportsMaskedMemoryOps
static cl::opt< bool > ForceTargetSupportsMaskedMemoryOps("force-target-supports-masked-memory-ops", cl::init(false), cl::Hidden, cl::desc("Assume the target supports masked memory operations (used for " "testing)."))
Note: This currently only applies to llvm.masked.load and llvm.masked.store.

MaximizeBandwidth
static cl::opt< bool > MaximizeBandwidth("vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, cl::desc("Maximize bandwidth when selecting vectorization factor which " "will be determined by the smallest type in loop."))

LoopVectorizationPlanner.h
This file provides a LoopVectorizationPlanner class.

LoopVectorize.h

F
#define F(x, y, z)
Definition MD5.cpp:54

I
#define I(x, y, z)
Definition MD5.cpp:57

MathExtras.h

T
#define T
Definition Mips16ISelLowering.cpp:282

OptimizationRemarkEmitter.h

ScalarEvolution.h

Debug.h

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition Debug.h:119

RHS
Value * RHS
Definition X86PartialReduction.cpp:81

LHS
Value * LHS
Definition X86PartialReduction.cpp:80

llvm::BasicBlock
LLVM Basic Block Representation.
Definition BasicBlock.h:62

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64

llvm::ElementCount
Definition TypeSize.h:298

llvm::ElementCount::isVector
constexpr bool isVector() const
One or more elements.
Definition TypeSize.h:324

llvm::ElementCount::getScalable
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:312

llvm::ElementCount::getFixed
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309

llvm::ElementCount::get
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315

llvm::ElementCount::isScalar
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320

llvm::Function
Definition Function.h:65

llvm::Instruction
Definition Instruction.h:69

llvm::LoopVectorizeHints::isScalableVectorizationDisabled
bool isScalableVectorizationDisabled() const
Definition LoopVectorizationLegality.h:167

llvm::Loop
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40

llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition DiagnosticInfo.h:857

llvm::PHINode
Definition Instructions.h:2659

llvm::RecurrenceDescriptor
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition IVDescriptors.h:87

llvm::RecurrenceDescriptor::getRecurrenceType
Type * getRecurrenceType() const
Returns the type of the recurrence.
Definition IVDescriptors.h:303

llvm::RecurrenceDescriptor::hasUsesOutsideReductionChain
bool hasUsesOutsideReductionChain() const
Returns true if the reduction PHI has any uses outside the reduction chain.
Definition IVDescriptors.h:323

llvm::RecurrenceDescriptor::isFindLastRecurrenceKind
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
Definition IVDescriptors.h:293

llvm::RecurrenceDescriptor::getReductionOpChain
LLVM_ABI SmallVector< Instruction *, 4 > getReductionOpChain(PHINode *Phi, Loop *L) const
Attempts to find a chain of operations from Phi to LoopExitInst that can be treated as a set of reduc...
Definition IVDescriptors.cpp:1274

llvm::RecurrenceDescriptor::isAnyOfRecurrenceKind
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
Definition IVDescriptors.h:280

llvm::RecurrenceDescriptor::getRecurrenceKind
RecurKind getRecurrenceKind() const
Definition IVDescriptors.h:227

llvm::RecurrenceDescriptor::isOrdered
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
Definition IVDescriptors.h:318

llvm::RecurrenceDescriptor::isFindIVRecurrenceKind
static bool isFindIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
Definition IVDescriptors.h:286

llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition SmallPtrSet.h:368

llvm::SmallPtrSetImpl::contains
bool contains(ConstPtrType Ptr) const
Definition SmallPtrSet.h:461

llvm::SmallVectorTemplateCommon::empty
bool empty() const
Definition SmallVector.h:86

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1225

llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition TargetTransformInfo.h:271

llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition TargetTransformInfo.h:1350

llvm::TargetTransformInfo::RGK_FixedWidthVector
@ RGK_FixedWidthVector
Definition TargetTransformInfo.h:1350

llvm::TargetTransformInfo::RGK_ScalableVector
@ RGK_ScalableVector
Definition TargetTransformInfo.h:1350

llvm::TypeSize
Definition TypeSize.h:332

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46

llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:141

llvm::VFSelectionContext::computeVPlanOuterloopVF
FixedScalableVFPair computeVPlanOuterloopVF(ElementCount UserVF)
Returns a scalable VF to use for outer-loop vectorization if the target supports it and a fixed VF ot...
Definition LoopVectorizationPlanner.cpp:632

llvm::VFSelectionContext::getSmallestAndWidestTypes
std::pair< unsigned, unsigned > getSmallestAndWidestTypes() const
Definition LoopVectorizationPlanner.cpp:439

llvm::VFSelectionContext::supportsScalableVectors
bool supportsScalableVectors() const
Definition LoopVectorizationPlanner.cpp:96

llvm::VFSelectionContext::runtimeChecksRequired
bool runtimeChecksRequired()
Check whether vectorization would require runtime checks.
Definition LoopVectorizationPlanner.cpp:533

llvm::VFSelectionContext::isLegalGatherOrScatter
bool isLegalGatherOrScatter(Value *V, ElementCount VF) const
Returns true if the target machine can represent V as a masked gather or scatter operation.
Definition LoopVectorizationPlanner.cpp:81

llvm::VFSelectionContext::collectInLoopReductions
void collectInLoopReductions()
Split reductions into those that happen in the loop, and those that happen outside.
Definition LoopVectorizationPlanner.cpp:574

llvm::VFSelectionContext::computeFeasibleMaxVF
FixedScalableVFPair computeFeasibleMaxVF(unsigned MaxTripCount, ElementCount UserVF, unsigned UserIC, bool FoldTailByMasking, bool RequiresScalarEpilogue)
Definition LoopVectorizationPlanner.cpp:326

llvm::VFSelectionContext::useOrderedReductions
bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) const
Returns true if we should use strict in-order reductions for the given RdxDesc.
Definition LoopVectorizationPlanner.cpp:528

llvm::VFSelectionContext::shouldConsiderRegPressureForVF
bool shouldConsiderRegPressureForVF(ElementCount VF) const
Definition LoopVectorizationPlanner.cpp:110

llvm::VFSelectionContext::collectElementTypesForWidening
void collectElementTypesForWidening(const SmallPtrSetImpl< const Value * > *ValuesToIgnore=nullptr)
Collect element types in the loop that need widening.
Definition LoopVectorizationPlanner.cpp:468

llvm::VFSelectionContext::isLegalMaskedLoadOrStore
bool isLegalMaskedLoadOrStore(Instruction *I, ElementCount VF) const
Returns true if the target machine supports masked loads or stores for I's data type and alignment.
Definition LoopVectorizationPlanner.cpp:69

llvm::VFSelectionContext::computeMinimalBitwidths
void computeMinimalBitwidths()
Compute smallest bitwidth each instruction can be represented with.
Definition LoopVectorizationPlanner.cpp:570

llvm::Value
LLVM Value Representation.
Definition Value.h:75

llvm::VectorType::get
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.

llvm::cl::opt
Definition CommandLine.h:1454

llvm::details::FixedOrScalableQuantity< ElementCount, unsigned >::isKnownLE
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230

llvm::details::FixedOrScalableQuantity< ElementCount, unsigned >::isKnownLT
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:216

llvm::details::FixedOrScalableQuantity::isScalable
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168

llvm::details::FixedOrScalableQuantity::getKnownMinValue
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165

llvm::details::FixedOrScalableQuantity::isZero
constexpr bool isZero() const
Definition TypeSize.h:153

llvm::details::FixedOrScalableQuantity< ElementCount, unsigned >::isKnownGT
static constexpr bool isKnownGT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:223

llvm::IRSimilarity::Legal
@ Legal
Definition IRSimilarityIdentifier.h:77

llvm::SI
Definition SIInstrInfo.h:1902

llvm::cl::Hidden
@ Hidden
Definition CommandLine.h:138

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition CommandLine.h:444

llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition OptimizationRemarkEmitter.h:139

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition FunctionInfo.h:25

llvm::reportVectorizationInfo
void reportVectorizationInfo(const StringRef Msg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, const Loop *TheLoop, Instruction *I=nullptr, DebugLoc DL={})
Reports an informative message: print Msg for debugging purposes as well as an optimization remark.
Definition LoopVectorize.cpp:757

llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738

llvm::getLoadStoreAddressSpace
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
Definition Instructions.h:5337

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643

llvm::getLoadStoreAlignment
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
Definition Instructions.h:5317

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745

llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209

llvm::getMaxVScale
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
Definition LoopVectorizationPlanner.cpp:234

llvm::isa
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547

llvm::reportVectorizationFailure
LLVM_ABI void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, const Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
Definition LoopVectorize.cpp:746

llvm::TTI
TargetTransformInfo TTI
Definition TargetTransformInfo.h:266

llvm::RecurKind
RecurKind
These are the kinds of recurrences that we support.
Definition IVDescriptors.h:34

llvm::getLoadStoreType
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
Definition Instructions.h:5346

llvm::bit_floor
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:347

llvm::computeMinimumValueSizes
LLVM_ABI MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
Definition VectorUtils.cpp:801

llvm::PreferInLoopReductions
cl::opt< bool > PreferInLoopReductions

N
#define N

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39

llvm::FixedScalableVFPair
A class that represents two vectorization factors (initialized with 0 by default).
Definition LoopVectorizationPlanner.h:502

llvm::FixedScalableVFPair::getNone
static FixedScalableVFPair getNone()
Definition LoopVectorizationPlanner.h:519

llvm::cl::desc
Definition CommandLine.h:410