doxygen/BasicTTIImpl_8h_source.html

//===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file

/// This file provides a helper that implements much of the TTI interface in

/// terms of the target-independent code generator and TargetLowering

/// interfaces.

//

//===----------------------------------------------------------------------===//


#ifndef LLVM_CODEGEN_BASICTTIIMPL_H

#define LLVM_CODEGEN_BASICTTIIMPL_H


#include "llvm/ADT/APInt.h"

#include "llvm/ADT/BitVector.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/ADT/SmallPtrSet.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/Analysis/LoopInfo.h"

#include "llvm/Analysis/OptimizationRemarkEmitter.h"

#include "llvm/Analysis/TargetLibraryInfo.h"

#include "llvm/Analysis/TargetTransformInfo.h"

#include "llvm/Analysis/TargetTransformInfoImpl.h"

#include "llvm/Analysis/ValueTracking.h"

#include "llvm/CodeGen/ISDOpcodes.h"

#include "llvm/CodeGen/TargetLowering.h"

#include "llvm/CodeGen/TargetSubtargetInfo.h"

#include "llvm/CodeGen/ValueTypes.h"

#include "llvm/CodeGenTypes/MachineValueType.h"

#include "llvm/IR/BasicBlock.h"

#include "llvm/IR/Constant.h"

#include "llvm/IR/Constants.h"

#include "llvm/IR/DataLayout.h"

#include "llvm/IR/DerivedTypes.h"

#include "llvm/IR/InstrTypes.h"

#include "llvm/IR/Instruction.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/Intrinsics.h"

#include "llvm/IR/Operator.h"

#include "llvm/IR/Type.h"

#include "llvm/IR/Value.h"

#include "llvm/Support/Alignment.h"

#include "llvm/Support/Casting.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/MathExtras.h"

#include "llvm/Target/TargetMachine.h"

#include "llvm/Target/TargetOptions.h"

#include "llvm/Transforms/Utils/LoopUtils.h"

#include <algorithm>

#include <cassert>

#include <cstdint>

#include <limits>

#include <optional>

#include <utility>


namespace llvm {


class Function;

class GlobalValue;

class LLVMContext;

class ScalarEvolution;

class SCEV;

class TargetMachine;


extern LLVM_ABI cl::opt<unsigned> PartialUnrollingThreshold;


/// Base class which can be used to help build a TTI implementation.

///

/// This class provides as much implementation of the TTI interface as is

/// possible using the target independent parts of the code generator.

///

/// In order to subclass it, your class must implement a getST() method to

/// return the subtarget, and a getTLI() method to return the target lowering.

/// We need these methods implemented in the derived class so that this class

/// doesn't have to duplicate storage for them.

template <typename T>


class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {

private:

  using BaseT = TargetTransformInfoImplCRTPBase<T>;

  using TTI = TargetTransformInfo;


  /// Helper function to access this as a T.

  const T *thisT() const { return static_cast<const T *>(this); }


  /// Estimate a cost of Broadcast as an extract and sequence of insert

  /// operations.

  InstructionCost

  getBroadcastShuffleOverhead(FixedVectorType *VTy,

                              TTI::TargetCostKind CostKind) const {

    InstructionCost Cost = 0;

    // Broadcast cost is equal to the cost of extracting the zero'th element

    // plus the cost of inserting it into every element of the result vector.

    Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,

                                        CostKind, 0, nullptr, nullptr);


    for (int i = 0, e = VTy->getNumElements(); i < e; ++i) {

      Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,

                                          CostKind, i, nullptr, nullptr);

    }

    return Cost;

  }


  /// Estimate a cost of shuffle as a sequence of extract and insert

  /// operations.

  InstructionCost

  getPermuteShuffleOverhead(FixedVectorType *VTy,

                            TTI::TargetCostKind CostKind) const {

    InstructionCost Cost = 0;

    // Shuffle cost is equal to the cost of extracting element from its argument

    // plus the cost of inserting them onto the result vector.


    // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from

    // index 0 of first vector, index 1 of second vector,index 2 of first

    // vector and finally index 3 of second vector and insert them at index

    // <0,1,2,3> of result vector.

    for (int i = 0, e = VTy->getNumElements(); i < e; ++i) {

      Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,

                                          CostKind, i, nullptr, nullptr);

      Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,

                                          CostKind, i, nullptr, nullptr);

    }

    return Cost;

  }


  /// Estimate a cost of subvector extraction as a sequence of extract and

  /// insert operations.

  InstructionCost getExtractSubvectorOverhead(VectorType *VTy,

                                              TTI::TargetCostKind CostKind,

                                              int Index,

                                              FixedVectorType *SubVTy) const {

    assert(VTy && SubVTy &&

           "Can only extract subvectors from vectors");

    int NumSubElts = SubVTy->getNumElements();

    assert((!isa<FixedVectorType>(VTy) ||

            (Index + NumSubElts) <=

                (int)cast<FixedVectorType>(VTy)->getNumElements()) &&

           "SK_ExtractSubvector index out of range");


    InstructionCost Cost = 0;

    // Subvector extraction cost is equal to the cost of extracting element from

    // the source type plus the cost of inserting them into the result vector

    // type.

    for (int i = 0; i != NumSubElts; ++i) {

      Cost +=

          thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,

                                      CostKind, i + Index, nullptr, nullptr);

      Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, SubVTy,

                                          CostKind, i, nullptr, nullptr);

    }

    return Cost;

  }


  /// Estimate a cost of subvector insertion as a sequence of extract and

  /// insert operations.

  InstructionCost getInsertSubvectorOverhead(VectorType *VTy,

                                             TTI::TargetCostKind CostKind,

                                             int Index,

                                             FixedVectorType *SubVTy) const {

    assert(VTy && SubVTy &&

           "Can only insert subvectors into vectors");

    int NumSubElts = SubVTy->getNumElements();

    assert((!isa<FixedVectorType>(VTy) ||

            (Index + NumSubElts) <=

                (int)cast<FixedVectorType>(VTy)->getNumElements()) &&

           "SK_InsertSubvector index out of range");


    InstructionCost Cost = 0;

    // Subvector insertion cost is equal to the cost of extracting element from

    // the source type plus the cost of inserting them into the result vector

    // type.

    for (int i = 0; i != NumSubElts; ++i) {

      Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVTy,

                                          CostKind, i, nullptr, nullptr);

      Cost +=

          thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, CostKind,

                                      i + Index, nullptr, nullptr);

    }

    return Cost;

  }


  /// Local query method delegates up to T which *must* implement this!

  const TargetSubtargetInfo *getST() const {

    return static_cast<const T *>(this)->getST();

  }


  /// Local query method delegates up to T which *must* implement this!

  const TargetLoweringBase *getTLI() const {

    return static_cast<const T *>(this)->getTLI();

  }


  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {

    switch (M) {

      case TTI::MIM_Unindexed:

        return ISD::UNINDEXED;

      case TTI::MIM_PreInc:

        return ISD::PRE_INC;

      case TTI::MIM_PreDec:

        return ISD::PRE_DEC;

      case TTI::MIM_PostInc:

        return ISD::POST_INC;

      case TTI::MIM_PostDec:

        return ISD::POST_DEC;

    }

    llvm_unreachable("Unexpected MemIndexedMode");

  }


  InstructionCost getCommonMaskedMemoryOpCost(unsigned Opcode, Type *DataTy,

                                              Align Alignment,

                                              bool VariableMask,

                                              bool IsGatherScatter,

                                              TTI::TargetCostKind CostKind,

                                              unsigned AddressSpace = 0) const {

    // We cannot scalarize scalable vectors, so return Invalid.

    if (isa<ScalableVectorType>(DataTy))

      return InstructionCost::getInvalid();


    auto *VT = cast<FixedVectorType>(DataTy);

    unsigned VF = VT->getNumElements();


    // Assume the target does not have support for gather/scatter operations

    // and provide a rough estimate.

    //

    // First, compute the cost of the individual memory operations.

    InstructionCost AddrExtractCost =

        IsGatherScatter ? getScalarizationOverhead(

                              FixedVectorType::get(

                                  PointerType::get(VT->getContext(), 0), VF),

                              /*Insert=*/false, /*Extract=*/true, CostKind)

                        : 0;


    // The cost of the scalar loads/stores.

    InstructionCost MemoryOpCost =

        VF * thisT()->getMemoryOpCost(Opcode, VT->getElementType(), Alignment,

                                      AddressSpace, CostKind);


    // Next, compute the cost of packing the result in a vector.

    InstructionCost PackingCost =

        getScalarizationOverhead(VT, Opcode != Instruction::Store,

                                 Opcode == Instruction::Store, CostKind);


    InstructionCost ConditionalCost = 0;

    if (VariableMask) {

      // Compute the cost of conditionally executing the memory operations with

      // variable masks. This includes extracting the individual conditions, a

      // branches and PHIs to combine the results.

      // NOTE: Estimating the cost of conditionally executing the memory

      // operations accurately is quite difficult and the current solution

      // provides a very rough estimate only.

      ConditionalCost =

          getScalarizationOverhead(

              FixedVectorType::get(Type::getInt1Ty(DataTy->getContext()), VF),

              /*Insert=*/false, /*Extract=*/true, CostKind) +

          VF * (thisT()->getCFInstrCost(Instruction::CondBr, CostKind) +

                thisT()->getCFInstrCost(Instruction::PHI, CostKind));

    }


    return AddrExtractCost + MemoryOpCost + PackingCost + ConditionalCost;

  }


  /// Checks if the provided mask \p is a splat mask, i.e. it contains only -1

  /// or same non -1 index value and this index value contained at least twice.

  /// So, mask <0, -1,-1, -1> is not considered splat (it is just identity),

  /// same for <-1, 0, -1, -1> (just a slide), while <2, -1, 2, -1> is a splat

  /// with \p Index=2.

  static bool isSplatMask(ArrayRef<int> Mask, unsigned NumSrcElts, int &Index) {

    // Check that the broadcast index meets at least twice.

    bool IsCompared = false;

    if (int SplatIdx = PoisonMaskElem;

        all_of(enumerate(Mask), [&](const auto &P) {

          if (P.value() == PoisonMaskElem)

            return P.index() != Mask.size() - 1 || IsCompared;

          if (static_cast<unsigned>(P.value()) >= NumSrcElts * 2)

            return false;

          if (SplatIdx == PoisonMaskElem) {

            SplatIdx = P.value();

            return P.index() != Mask.size() - 1;

          }

          IsCompared = true;

          return SplatIdx == P.value();

        })) {

      Index = SplatIdx;

      return true;

    }

    return false;

  }


  /// Several intrinsics that return structs (including llvm.sincos[pi] and

  /// llvm.modf) can be lowered to a vector library call (for certain VFs). The

  /// vector library functions correspond to the scalar calls (e.g. sincos or

  /// modf), which unlike the intrinsic return values via output pointers. This

  /// helper checks if a vector call exists for the given intrinsic, and returns

  /// the cost, which includes the cost of the mask (if required), and the loads

  /// for values returned via output pointers. \p LC is the scalar libcall and

  /// \p CallRetElementIndex (optional) is the struct element which is mapped to

  /// the call return value. If std::nullopt is returned, then no vector library

  /// call is available, so the intrinsic should be assigned the default cost

  /// (e.g. scalarization).

  std::optional<InstructionCost> getMultipleResultIntrinsicVectorLibCallCost(

      const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind,

      std::optional<unsigned> CallRetElementIndex = {}) const {

    Type *RetTy = ICA.getReturnType();

    // Vector variants of the intrinsic can be mapped to a vector library call.

    if (!isa<StructType>(RetTy) ||

        !isVectorizedStructTy(cast<StructType>(RetTy)))

      return std::nullopt;


    Type *Ty = getContainedTypes(RetTy).front();

    EVT VT = getTLI()->getValueType(DL, Ty);


    RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;


    switch (ICA.getID()) {

    case Intrinsic::modf:

      LC = RTLIB::getMODF(VT);

      break;

    case Intrinsic::sincospi:

      LC = RTLIB::getSINCOSPI(VT);

      break;

    case Intrinsic::sincos:

      LC = RTLIB::getSINCOS(VT);

      break;

    default:

      return std::nullopt;

    }


    // Find associated libcall.

    RTLIB::LibcallImpl LibcallImpl = getTLI()->getLibcallImpl(LC);

    if (LibcallImpl == RTLIB::Unsupported)

      return std::nullopt;


    LLVMContext &Ctx = RetTy->getContext();


    // Cost the call + mask.

    auto Cost =

        thisT()->getCallInstrCost(nullptr, RetTy, ICA.getArgTypes(), CostKind);


    if (RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(LibcallImpl)) {

      ElementCount VF = getVectorizedTypeVF(RetTy);

      auto VecTy = VectorType::get(IntegerType::getInt1Ty(Ctx), VF);

      Cost += thisT()->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy,

                                      VecTy, {}, CostKind, 0, nullptr, {});

    }


    // Lowering to a library call (with output pointers) may require us to emit

    // reloads for the results.

    for (auto [Idx, VectorTy] : enumerate(getContainedTypes(RetTy))) {

      if (Idx == CallRetElementIndex)

        continue;

      Cost += thisT()->getMemoryOpCost(

          Instruction::Load, VectorTy,

          thisT()->getDataLayout().getABITypeAlign(VectorTy), 0, CostKind);

    }

    return Cost;

  }


  /// Filter out constant and duplicated entries in \p Ops and return a vector

  /// containing the types from \p Tys corresponding to the remaining operands.

  static SmallVector<Type *, 4>

  filterConstantAndDuplicatedOperands(ArrayRef<const Value *> Ops,

                                      ArrayRef<Type *> Tys) {

    SmallPtrSet<const Value *, 4> UniqueOperands;

    SmallVector<Type *, 4> FilteredTys;

    for (const auto &[Op, Ty] : zip_equal(Ops, Tys)) {

      if (isa<Constant>(Op) || !UniqueOperands.insert(Op).second)

        continue;

      FilteredTys.push_back(Ty);

    }

    return FilteredTys;

  }


protected:


  explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)

      : BaseT(DL) {}


  ~BasicTTIImplBase() override = default;


  using TargetTransformInfoImplBase::DL;

  using TargetTransformInfoImplBase::getScalarizationOverhead;


public:

  /// \name Scalar TTI Implementations

  /// @{


  bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,

                                      unsigned AddressSpace, Align Alignment,

                                      unsigned *Fast) const override {

    EVT E = EVT::getIntegerVT(Context, BitWidth);

    return getTLI()->allowsMisalignedMemoryAccesses(

        E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);

  }


  bool areInlineCompatible(const Function *Caller,

                           const Function *Callee) const override {

    const TargetMachine &TM = getTLI()->getTargetMachine();


    const TargetSubtargetInfo *CallerSTI = TM.getSubtargetImpl(*Caller);

    const TargetSubtargetInfo *CalleeSTI = TM.getSubtargetImpl(*Callee);

    FeatureBitset InlineIgnoreFeatures = CallerSTI->getInlineIgnoreFeatures();

    FeatureBitset InlineInverseFeatures = CallerSTI->getInlineInverseFeatures();

    FeatureBitset InlineMustMatchFeatures =

        CallerSTI->getInlineMustMatchFeatures();


    FeatureBitset CallerBits =

        (CallerSTI->getFeatureBits() ^ InlineInverseFeatures) &

        ~InlineIgnoreFeatures;

    FeatureBitset CalleeBits =

        (CalleeSTI->getFeatureBits() ^ InlineInverseFeatures) &

        ~InlineIgnoreFeatures;


    if ((CallerBits & InlineMustMatchFeatures) !=

        (CalleeBits & InlineMustMatchFeatures))

      return false;


    // Inline a callee if its target-features are a subset of the callers

    // target-features.

    return (CallerBits & CalleeBits) == CalleeBits;

  }


  bool hasBranchDivergence(const Function *F = nullptr) const override {

    return false;

  }


  bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {

    return false;

  }


  bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const override {

    return true;

  }


  unsigned getFlatAddressSpace() const override {

    // Return an invalid address space.

    return -1;

  }


  bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,

                                  Intrinsic::ID IID) const override {

    return false;

  }


  bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {

    return getTLI()->getTargetMachine().isNoopAddrSpaceCast(FromAS, ToAS);

  }


  unsigned getAssumedAddrSpace(const Value *V) const override {

    return getTLI()->getTargetMachine().getAssumedAddrSpace(V);

  }


  bool isSingleThreaded() const override {

    return getTLI()->getTargetMachine().Options.ThreadModel ==

           ThreadModel::Single;

  }


  std::pair<const Value *, unsigned>


  getPredicatedAddrSpace(const Value *V) const override {

    return getTLI()->getTargetMachine().getPredicatedAddrSpace(V);

  }


  Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,

                                          Value *NewV) const override {

    return nullptr;

  }


  bool isLegalAddImmediate(int64_t imm) const override {

    return getTLI()->isLegalAddImmediate(imm);

  }


  bool isLegalAddScalableImmediate(int64_t Imm) const override {

    return getTLI()->isLegalAddScalableImmediate(Imm);

  }


  bool isLegalICmpImmediate(int64_t imm) const override {

    return getTLI()->isLegalICmpImmediate(imm);

  }


  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,

                             bool HasBaseReg, int64_t Scale, unsigned AddrSpace,

                             Instruction *I = nullptr,

                             int64_t ScalableOffset = 0) const override {

    TargetLoweringBase::AddrMode AM;

    AM.BaseGV = BaseGV;

    AM.BaseOffs = BaseOffset;

    AM.HasBaseReg = HasBaseReg;

    AM.Scale = Scale;

    AM.ScalableOffset = ScalableOffset;

    return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);

  }


  int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) {

    return getTLI()->getPreferredLargeGEPBaseOffset(MinOffset, MaxOffset);

  }


  unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy,

                             Align Alignment,

                             unsigned AddrSpace) const override {

    auto &&IsSupportedByTarget = [this, ScalarMemTy, ScalarValTy, Alignment,

                                  AddrSpace](unsigned VF) {

      auto *SrcTy = FixedVectorType::get(ScalarMemTy, VF / 2);

      EVT VT = getTLI()->getValueType(DL, SrcTy);

      if (getTLI()->isOperationLegal(ISD::STORE, VT) ||

          getTLI()->isOperationCustom(ISD::STORE, VT))

        return true;


      EVT ValVT =

          getTLI()->getValueType(DL, FixedVectorType::get(ScalarValTy, VF / 2));

      EVT LegalizedVT =

          getTLI()->getTypeToTransformTo(ScalarMemTy->getContext(), VT);

      return getTLI()->isTruncStoreLegal(LegalizedVT, ValVT, Alignment,

                                         AddrSpace);

    };

    while (VF > 2 && IsSupportedByTarget(VF))

      VF /= 2;

    return VF;

  }


  bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty) const override {

    EVT VT = getTLI()->getValueType(DL, Ty, /*AllowUnknown=*/true);

    return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);

  }


  bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty) const override {

    EVT VT = getTLI()->getValueType(DL, Ty, /*AllowUnknown=*/true);

    return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);

  }


  bool isLSRCostLess(const TTI::LSRCost &C1,

                     const TTI::LSRCost &C2) const override {

    return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);

  }


  bool isNumRegsMajorCostOfLSR() const override {

    return TargetTransformInfoImplBase::isNumRegsMajorCostOfLSR();

  }


  bool shouldDropLSRSolutionIfLessProfitable() const override {

    return TargetTransformInfoImplBase::shouldDropLSRSolutionIfLessProfitable();

  }


  bool isProfitableLSRChainElement(Instruction *I) const override {

    return TargetTransformInfoImplBase::isProfitableLSRChainElement(I);

  }


  InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,

                                       StackOffset BaseOffset, bool HasBaseReg,

                                       int64_t Scale,

                                       unsigned AddrSpace) const override {

    TargetLoweringBase::AddrMode AM;

    AM.BaseGV = BaseGV;

    AM.BaseOffs = BaseOffset.getFixed();

    AM.HasBaseReg = HasBaseReg;

    AM.Scale = Scale;

    AM.ScalableOffset = BaseOffset.getScalable();

    if (getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace))

      return 0;

    return InstructionCost::getInvalid();

  }


  bool isTruncateFree(Type *Ty1, Type *Ty2) const override {

    return getTLI()->isTruncateFree(Ty1, Ty2);

  }


  bool isProfitableToHoist(Instruction *I) const override {

    return getTLI()->isProfitableToHoist(I);

  }


  bool useAA() const override { return getST()->useAA(); }


  bool isTypeLegal(Type *Ty) const override {

    EVT VT = getTLI()->getValueType(DL, Ty, /*AllowUnknown=*/true);

    return getTLI()->isTypeLegal(VT);

  }


  unsigned getRegUsageForType(Type *Ty) const override {

    EVT ETy = getTLI()->getValueType(DL, Ty);

    return getTLI()->getNumRegisters(Ty->getContext(), ETy);

  }


  InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,

                             ArrayRef<const Value *> Operands, Type *AccessType,

                             TTI::TargetCostKind CostKind) const override {

    return BaseT::getGEPCost(PointeeType, Ptr, Operands, AccessType, CostKind);

  }


  unsigned getEstimatedNumberOfCaseClusters(

      const SwitchInst &SI, unsigned &JumpTableSize, ProfileSummaryInfo *PSI,

      BlockFrequencyInfo *BFI) const override {

    /// Try to find the estimated number of clusters. Note that the number of

    /// clusters identified in this function could be different from the actual

    /// numbers found in lowering. This function ignore switches that are

    /// lowered with a mix of jump table / bit test / BTree. This function was

    /// initially intended to be used when estimating the cost of switch in

    /// inline cost heuristic, but it's a generic cost model to be used in other

    /// places (e.g., in loop unrolling).

    unsigned N = SI.getNumCases();

    const TargetLoweringBase *TLI = getTLI();

    const DataLayout &DL = this->getDataLayout();


    JumpTableSize = 0;

    bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());


    // Early exit if both a jump table and bit test are not allowed.

    if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N))

      return N;


    APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();

    APInt MinCaseVal = MaxCaseVal;

    for (auto CI : SI.cases()) {

      const APInt &CaseVal = CI.getCaseValue()->getValue();

      if (CaseVal.sgt(MaxCaseVal))

        MaxCaseVal = CaseVal;

      if (CaseVal.slt(MinCaseVal))

        MinCaseVal = CaseVal;

    }


    // Check if suitable for a bit test

    if (N <= DL.getIndexSizeInBits(0u)) {

      DenseMap<const BasicBlock *, unsigned int> DestMap;

      for (auto I : SI.cases()) {

        const BasicBlock *BB = I.getCaseSuccessor();

        ++DestMap[BB];

      }


      if (TLI->isSuitableForBitTests(DestMap, MinCaseVal, MaxCaseVal, DL))

        return 1;

    }


    // Check if suitable for a jump table.

    if (IsJTAllowed) {

      if (N < 2 || N < TLI->getMinimumJumpTableEntries())

        return N;

      uint64_t Range =

          (MaxCaseVal - MinCaseVal)

              .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;

      // Check whether a range of clusters is dense enough for a jump table

      if (TLI->isSuitableForJumpTable(&SI, N, Range, PSI, BFI)) {

        JumpTableSize = Range;

        return 1;

      }

    }

    return N;

  }


  bool shouldBuildLookupTables() const override {

    const TargetLoweringBase *TLI = getTLI();

    return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||

           TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other);

  }


  bool shouldBuildRelLookupTables() const override {

    const TargetMachine &TM = getTLI()->getTargetMachine();

    // If non-PIC mode, do not generate a relative lookup table.

    if (!TM.isPositionIndependent())

      return false;


    /// Relative lookup table entries consist of 32-bit offsets.

    /// Do not generate relative lookup tables for large code models

    /// in 64-bit achitectures where 32-bit offsets might not be enough.

    if (TM.getCodeModel() == CodeModel::Medium ||

        TM.getCodeModel() == CodeModel::Large)

      return false;


    const Triple &TargetTriple = TM.getTargetTriple();

    if (!TargetTriple.isArch64Bit())

      return false;


    // TODO: Triggers issues on aarch64 on darwin, so temporarily disable it

    // there.

    if (TargetTriple.getArch() == Triple::aarch64 && TargetTriple.isOSDarwin())

      return false;


    return true;

  }


  bool haveFastSqrt(Type *Ty) const override {

    const TargetLoweringBase *TLI = getTLI();

    EVT VT = TLI->getValueType(DL, Ty);

    return TLI->isTypeLegal(VT) &&

           TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);

  }


  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const override { return true; }


  InstructionCost getFPOpCost(Type *Ty) const override {

    // Check whether FADD is available, as a proxy for floating-point in

    // general.

    const TargetLoweringBase *TLI = getTLI();

    EVT VT = TLI->getValueType(DL, Ty);

    if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT))

      return TargetTransformInfo::TCC_Basic;

    return TargetTransformInfo::TCC_Expensive;

  }


  bool preferToKeepConstantsAttached(const Instruction &Inst,

                                     const Function &Fn) const override {

    switch (Inst.getOpcode()) {

    default:

      break;

    case Instruction::SDiv:

    case Instruction::SRem:

    case Instruction::UDiv:

    case Instruction::URem: {

      if (!isa<ConstantInt>(Inst.getOperand(1)))

        return false;

      EVT VT = getTLI()->getValueType(DL, Inst.getType());

      return !getTLI()->isIntDivCheap(VT, Fn.getAttributes());

    }

    };


    return false;

  }


  unsigned getInliningThresholdMultiplier() const override { return 1; }


  unsigned adjustInliningThreshold(const CallBase *CB) const override {

    return 0;

  }


  unsigned getCallerAllocaCost(const CallBase *CB,

                               const AllocaInst *AI) const override {

    return 0;

  }


  int getInlinerVectorBonusPercent() const override { return 150; }


  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,

                               TTI::UnrollingPreferences &UP,

                               OptimizationRemarkEmitter *ORE) const override {

    // This unrolling functionality is target independent, but to provide some

    // motivation for its intended use, for x86:


    // According to the Intel 64 and IA-32 Architectures Optimization Reference

    // Manual, Intel Core models and later have a loop stream detector (and

    // associated uop queue) that can benefit from partial unrolling.

    // The relevant requirements are:

    //  - The loop must have no more than 4 (8 for Nehalem and later) branches

    //    taken, and none of them may be calls.

    //  - The loop can have no more than 18 (28 for Nehalem and later) uops.


    // According to the Software Optimization Guide for AMD Family 15h

    // Processors, models 30h-4fh (Steamroller and later) have a loop predictor

    // and loop buffer which can benefit from partial unrolling.

    // The relevant requirements are:

    //  - The loop must have fewer than 16 branches

    //  - The loop must have less than 40 uops in all executed loop branches


    // The number of taken branches in a loop is hard to estimate here, and

    // benchmarking has revealed that it is better not to be conservative when

    // estimating the branch count. As a result, we'll ignore the branch limits

    // until someone finds a case where it matters in practice.


    unsigned MaxOps;

    const TargetSubtargetInfo *ST = getST();

    if (PartialUnrollingThreshold.getNumOccurrences() > 0)

      MaxOps = PartialUnrollingThreshold;

    else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)

      MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;

    else

      return;


    // Scan the loop: don't unroll loops with calls.

    for (BasicBlock *BB : L->blocks()) {

      for (Instruction &I : *BB) {

        if (isa<CallInst>(I) || isa<InvokeInst>(I)) {

          if (const Function *F = cast<CallBase>(I).getCalledFunction()) {

            if (!thisT()->isLoweredToCall(F))

              continue;

          }


          if (ORE) {

            ORE->emit([&]() {

              return OptimizationRemark("TTI", "DontUnroll", L->getStartLoc(),

                                        L->getHeader())

                     << "advising against unrolling the loop because it "

                        "contains a "

                     << ore::NV("Call", &I);

            });

          }

          return;

        }

      }

    }


    // Enable runtime and partial unrolling up to the specified size.

    // Enable using trip count upper bound to unroll loops.

    UP.Partial = UP.Runtime = UP.UpperBound = true;

    UP.PartialThreshold = MaxOps;


    // Avoid unrolling when optimizing for size.

    UP.OptSizeThreshold = 0;

    UP.PartialOptSizeThreshold = 0;


    // Set number of instructions optimized when "back edge"

    // becomes "fall through" to default value of 2.

    UP.BEInsns = 2;

  }


  void getPeelingPreferences(Loop *L, ScalarEvolution &SE,

                             TTI::PeelingPreferences &PP) const override {

    PP.PeelCount = 0;

    PP.AllowPeeling = true;

    PP.AllowLoopNestsPeeling = false;

    PP.PeelProfiledIterations = true;

  }


  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,

                                AssumptionCache &AC, TargetLibraryInfo *LibInfo,

                                HardwareLoopInfo &HWLoopInfo) const override {

    return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);

  }


  unsigned getEpilogueVectorizationMinVF() const override {

    return BaseT::getEpilogueVectorizationMinVF();

  }


  bool preferTailFoldingOverEpilogue(TailFoldingInfo *TFI) const override {

    return BaseT::preferTailFoldingOverEpilogue(TFI);

  }


  TailFoldingStyle getPreferredTailFoldingStyle() const override {

    return BaseT::getPreferredTailFoldingStyle();

  }


  std::optional<Instruction *>


  instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override {

    return BaseT::instCombineIntrinsic(IC, II);

  }


  std::optional<Value *>


  simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,

                                   APInt DemandedMask, KnownBits &Known,

                                   bool &KnownBitsComputed) const override {

    return BaseT::simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,

                                                   KnownBitsComputed);

  }


  std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(

      InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,

      APInt &UndefElts2, APInt &UndefElts3,

      std::function<void(Instruction *, unsigned, APInt, APInt &)>

          SimplifyAndSetOp) const override {

    return BaseT::simplifyDemandedVectorEltsIntrinsic(

        IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,

        SimplifyAndSetOp);

  }


  std::optional<unsigned>


  getCacheSize(TargetTransformInfo::CacheLevel Level) const override {

    return std::optional<unsigned>(

        getST()->getCacheSize(static_cast<unsigned>(Level)));

  }


  std::optional<unsigned>


  getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const override {

    std::optional<unsigned> TargetResult =

        getST()->getCacheAssociativity(static_cast<unsigned>(Level));


    if (TargetResult)

      return TargetResult;


    return BaseT::getCacheAssociativity(Level);

  }


  unsigned getCacheLineSize() const override {

    return getST()->getCacheLineSize();

  }


  unsigned getPrefetchDistance() const override {

    return getST()->getPrefetchDistance();

  }


  unsigned getMinPrefetchStride(unsigned NumMemAccesses,

                                unsigned NumStridedMemAccesses,

                                unsigned NumPrefetches,

                                bool HasCall) const override {

    return getST()->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,

                                         NumPrefetches, HasCall);

  }


  unsigned getMaxPrefetchIterationsAhead() const override {

    return getST()->getMaxPrefetchIterationsAhead();

  }


  bool enableWritePrefetching() const override {

    return getST()->enableWritePrefetching();

  }


  bool shouldPrefetchAddressSpace(unsigned AS) const override {

    return getST()->shouldPrefetchAddressSpace(AS);

  }


  /// @}


  /// \name Vector TTI Implementations

  /// @{


  TypeSize


  getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override {

    return TypeSize::getFixed(32);

  }


  std::optional<unsigned> getMaxVScale() const override { return std::nullopt; }


  std::optional<unsigned> getVScaleForTuning() const override {

    return std::nullopt;

  }


  /// Estimate the overhead of scalarizing an instruction. Insert and Extract

  /// are set if the demanded result elements need to be inserted and/or

  /// extracted from vectors.

  InstructionCost


  getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts,

                           bool Insert, bool Extract,

                           TTI::TargetCostKind CostKind,

                           bool ForPoisonSrc = true, ArrayRef<Value *> VL = {},

                           TTI::VectorInstrContext VIC =

                               TTI::VectorInstrContext::None) const override {

    /// FIXME: a bitfield is not a reasonable abstraction for talking about

    /// which elements are needed from a scalable vector

    if (isa<ScalableVectorType>(InTy))

      return InstructionCost::getInvalid();

    auto *Ty = cast<FixedVectorType>(InTy);


    assert(DemandedElts.getBitWidth() == Ty->getNumElements() &&

           (VL.empty() || VL.size() == Ty->getNumElements()) &&

           "Vector size mismatch");


    InstructionCost Cost = 0;


    for (int i = 0, e = Ty->getNumElements(); i < e; ++i) {

      if (!DemandedElts[i])

        continue;

      if (Insert) {

        Value *InsertedVal = VL.empty() ? nullptr : VL[i];

        Cost +=

            thisT()->getVectorInstrCost(Instruction::InsertElement, Ty,

                                        CostKind, i, nullptr, InsertedVal, VIC);

      }

      if (Extract)

        Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty,

                                            CostKind, i, nullptr, nullptr, VIC);

    }


    return Cost;

  }


  bool


  isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,

                                     unsigned ScalarOpdIdx) const override {

    return false;

  }


  bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,

                                              int OpdIdx) const override {

    return OpdIdx == -1;

  }


  bool


  isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID,

                                                   int RetIdx) const override {

    return RetIdx == 0;

  }


  /// Helper wrapper for the DemandedElts variant of getScalarizationOverhead.


  InstructionCost getScalarizationOverhead(

      VectorType *InTy, bool Insert, bool Extract, TTI::TargetCostKind CostKind,

      bool ForPoisonSrc = true, ArrayRef<Value *> VL = {},

      TTI::VectorInstrContext VIC = TTI::VectorInstrContext::None) const {

    if (isa<ScalableVectorType>(InTy))

      return InstructionCost::getInvalid();

    auto *Ty = cast<FixedVectorType>(InTy);


    APInt DemandedElts = APInt::getAllOnes(Ty->getNumElements());

    // Use CRTP to allow target overrides

    return thisT()->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,

                                             CostKind, ForPoisonSrc, VL, VIC);

  }


  /// Estimate the overhead of scalarizing an instruction's

  /// operands. The (potentially vector) types to use for each of

  /// argument are passes via Tys.


  InstructionCost getOperandsScalarizationOverhead(

      ArrayRef<Type *> Tys, TTI::TargetCostKind CostKind,

      TTI::VectorInstrContext VIC =

          TTI::VectorInstrContext::None) const override {

    InstructionCost Cost = 0;

    for (Type *Ty : Tys) {

      // Disregard things like metadata arguments.

      if (!Ty->isIntOrIntVectorTy() && !Ty->isFPOrFPVectorTy() &&

          !Ty->isPtrOrPtrVectorTy())

        continue;


      if (auto *VecTy = dyn_cast<VectorType>(Ty))

        Cost += getScalarizationOverhead(VecTy, /*Insert*/ false,

                                         /*Extract*/ true, CostKind,

                                         /*ForPoisonSrc=*/true, {}, VIC);

    }


    return Cost;

  }


  /// Estimate the overhead of scalarizing the inputs and outputs of an

  /// instruction, with return type RetTy and arguments Args of type Tys. If

  /// Args are unknown (empty), then the cost associated with one argument is

  /// added as a heuristic.


  InstructionCost getScalarizationOverhead(VectorType *RetTy,

                                           ArrayRef<const Value *> Args,

                                           ArrayRef<Type *> Tys,

                                           TTI::TargetCostKind CostKind) const {

    InstructionCost Cost = getScalarizationOverhead(

        RetTy, /*Insert*/ true, /*Extract*/ false, CostKind);

    if (!Args.empty())

      Cost += getOperandsScalarizationOverhead(

          filterConstantAndDuplicatedOperands(Args, Tys), CostKind);

    else

      // When no information on arguments is provided, we add the cost

      // associated with one argument as a heuristic.

      Cost += getScalarizationOverhead(RetTy, /*Insert*/ false,

                                       /*Extract*/ true, CostKind);


    return Cost;

  }


  /// Estimate the cost of type-legalization and the legalized type.


  std::pair<InstructionCost, MVT> getTypeLegalizationCost(Type *Ty) const {

    LLVMContext &C = Ty->getContext();

    EVT MTy = getTLI()->getValueType(DL, Ty);


    InstructionCost Cost = 1;

    // We keep legalizing the type until we find a legal kind. We assume that

    // the only operation that costs anything is the split. After splitting

    // we need to handle two types.

    while (true) {

      TargetLoweringBase::LegalizeKind LK = getTLI()->getTypeConversion(C, MTy);


      if (LK.first == TargetLoweringBase::TypeScalarizeScalableVector) {

        // Ensure we return a sensible simple VT here, since many callers of

        // this function require it.

        MVT VT = MTy.isSimple() ? MTy.getSimpleVT() : MVT::i64;

        return std::make_pair(InstructionCost::getInvalid(), VT);

      }


      if (LK.first == TargetLoweringBase::TypeLegal)

        return std::make_pair(Cost, MTy.getSimpleVT());


      if (LK.first == TargetLoweringBase::TypeSplitVector ||

          LK.first == TargetLoweringBase::TypeExpandInteger)

        Cost *= 2;


      // Do not loop with f128 type.

      if (MTy == LK.second)

        return std::make_pair(Cost, MTy.getSimpleVT());


      // Keep legalizing the type.

      MTy = LK.second;

    }

  }


  unsigned getMaxInterleaveFactor(ElementCount VF,

                                  bool HasUnorderedReductions) const override {

    return 1;

  }


  InstructionCost getArithmeticInstrCost(

      unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,

      TTI::OperandValueInfo Opd1Info = {TTI::OK_AnyValue, TTI::OP_None},

      TTI::OperandValueInfo Opd2Info = {TTI::OK_AnyValue, TTI::OP_None},

      ArrayRef<const Value *> Args = {},

      const Instruction *CxtI = nullptr) const override {

    // Check if any of the operands are vector operands.

    const TargetLoweringBase *TLI = getTLI();

    int ISD = TLI->InstructionOpcodeToISD(Opcode);

    assert(ISD && "Invalid opcode");


    // TODO: Handle more cost kinds.

    if (CostKind != TTI::TCK_RecipThroughput)

      return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind,

                                           Opd1Info, Opd2Info,

                                           Args, CxtI);


    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);


    bool IsFloat = Ty->isFPOrFPVectorTy();

    // Assume that floating point arithmetic operations cost twice as much as

    // integer operations.

    InstructionCost OpCost = (IsFloat ? 2 : 1);


    if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {

      // The operation is legal. Assume it costs 1.

      // TODO: Once we have extract/insert subvector cost we need to use them.

      return LT.first * OpCost;

    }


    if (!TLI->isOperationExpand(ISD, LT.second)) {

      // If the operation is custom lowered, then assume that the code is twice

      // as expensive.

      return LT.first * 2 * OpCost;

    }


    // An 'Expand' of URem and SRem is special because it may default

    // to expanding the operation into a sequence of sub-operations

    // i.e. X % Y -> X-(X/Y)*Y.

    if (ISD == ISD::UREM || ISD == ISD::SREM) {

      bool IsSigned = ISD == ISD::SREM;

      if (TLI->isOperationLegalOrCustom(IsSigned ? ISD::SDIVREM : ISD::UDIVREM,

                                        LT.second) ||

          TLI->isOperationLegalOrCustom(IsSigned ? ISD::SDIV : ISD::UDIV,

                                        LT.second)) {

        unsigned DivOpc = IsSigned ? Instruction::SDiv : Instruction::UDiv;

        InstructionCost DivCost = thisT()->getArithmeticInstrCost(

            DivOpc, Ty, CostKind, Opd1Info, Opd2Info);

        InstructionCost MulCost =

            thisT()->getArithmeticInstrCost(Instruction::Mul, Ty, CostKind);

        InstructionCost SubCost =

            thisT()->getArithmeticInstrCost(Instruction::Sub, Ty, CostKind);

        return DivCost + MulCost + SubCost;

      }

    }


    // We cannot scalarize scalable vectors, so return Invalid.

    if (isa<ScalableVectorType>(Ty))

      return InstructionCost::getInvalid();


    // Else, assume that we need to scalarize this op.

    // TODO: If one of the types get legalized by splitting, handle this

    // similarly to what getCastInstrCost() does.

    if (auto *VTy = dyn_cast<FixedVectorType>(Ty)) {

      InstructionCost Cost = thisT()->getArithmeticInstrCost(

          Opcode, VTy->getScalarType(), CostKind, Opd1Info, Opd2Info,

          Args, CxtI);

      // Return the cost of multiple scalar invocation plus the cost of

      // inserting and extracting the values.

      SmallVector<Type *> Tys(Args.size(), Ty);

      return getScalarizationOverhead(VTy, Args, Tys, CostKind) +

             VTy->getNumElements() * Cost;

    }


    // We don't know anything about this scalar instruction.

    return OpCost;

  }


  TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind,

                                              ArrayRef<int> Mask,

                                              VectorType *SrcTy, int &Index,

                                              VectorType *&SubTy) const {

    if (Mask.empty())

      return Kind;

    int NumDstElts = Mask.size();

    int NumSrcElts = SrcTy->getElementCount().getKnownMinValue();

    switch (Kind) {

    case TTI::SK_PermuteSingleSrc: {

      if (ShuffleVectorInst::isReverseMask(Mask, NumSrcElts))

        return TTI::SK_Reverse;

      if (ShuffleVectorInst::isZeroEltSplatMask(Mask, NumSrcElts))

        return TTI::SK_Broadcast;

      if (isSplatMask(Mask, NumSrcElts, Index))

        return TTI::SK_Broadcast;

      if (ShuffleVectorInst::isExtractSubvectorMask(Mask, NumSrcElts, Index) &&

          (Index + NumDstElts) <= NumSrcElts) {

        SubTy = FixedVectorType::get(SrcTy->getElementType(), NumDstElts);

        return TTI::SK_ExtractSubvector;

      }

      break;

    }

    case TTI::SK_PermuteTwoSrc: {

      if (all_of(Mask, [NumSrcElts](int M) { return M < NumSrcElts; }))

        return improveShuffleKindFromMask(TTI::SK_PermuteSingleSrc, Mask, SrcTy,

                                          Index, SubTy);

      int NumSubElts;

      if (NumDstElts > 2 && ShuffleVectorInst::isInsertSubvectorMask(

                                Mask, NumSrcElts, NumSubElts, Index)) {

        if (Index + NumSubElts > NumSrcElts)

          return Kind;

        SubTy = FixedVectorType::get(SrcTy->getElementType(), NumSubElts);

        return TTI::SK_InsertSubvector;

      }

      if (ShuffleVectorInst::isSelectMask(Mask, NumSrcElts))

        return TTI::SK_Select;

      if (ShuffleVectorInst::isTransposeMask(Mask, NumSrcElts))

        return TTI::SK_Transpose;

      if (ShuffleVectorInst::isSpliceMask(Mask, NumSrcElts, Index))

        return TTI::SK_Splice;

      break;

    }

    case TTI::SK_Select:

    case TTI::SK_Reverse:

    case TTI::SK_Broadcast:

    case TTI::SK_Transpose:

    case TTI::SK_InsertSubvector:

    case TTI::SK_ExtractSubvector:

    case TTI::SK_Splice:

      break;

    }

    return Kind;

  }


  InstructionCost


  getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy,

                 ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index,

                 VectorType *SubTp, ArrayRef<const Value *> Args = {},

                 const Instruction *CxtI = nullptr) const override {

    switch (improveShuffleKindFromMask(Kind, Mask, SrcTy, Index, SubTp)) {

    case TTI::SK_Broadcast:

      if (auto *FVT = dyn_cast<FixedVectorType>(SrcTy))

        return getBroadcastShuffleOverhead(FVT, CostKind);

      return InstructionCost::getInvalid();

    case TTI::SK_Select:

    case TTI::SK_Splice:

    case TTI::SK_Reverse:

    case TTI::SK_Transpose:

    case TTI::SK_PermuteSingleSrc:

    case TTI::SK_PermuteTwoSrc:

      if (auto *FVT = dyn_cast<FixedVectorType>(SrcTy))

        return getPermuteShuffleOverhead(FVT, CostKind);

      return InstructionCost::getInvalid();

    case TTI::SK_ExtractSubvector:

      return getExtractSubvectorOverhead(SrcTy, CostKind, Index,

                                         cast<FixedVectorType>(SubTp));

    case TTI::SK_InsertSubvector:

      return getInsertSubvectorOverhead(DstTy, CostKind, Index,

                                        cast<FixedVectorType>(SubTp));

    }

    llvm_unreachable("Unknown TTI::ShuffleKind");

  }


  InstructionCost


  getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,

                   TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,

                   const Instruction *I = nullptr) const override {

    if (BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I) == 0)

      return 0;


    const TargetLoweringBase *TLI = getTLI();

    int ISD = TLI->InstructionOpcodeToISD(Opcode);

    assert(ISD && "Invalid opcode");

    std::pair<InstructionCost, MVT> SrcLT = getTypeLegalizationCost(Src);

    std::pair<InstructionCost, MVT> DstLT = getTypeLegalizationCost(Dst);


    TypeSize SrcSize = SrcLT.second.getSizeInBits();

    TypeSize DstSize = DstLT.second.getSizeInBits();

    bool IntOrPtrSrc = Src->isIntegerTy() || Src->isPointerTy();

    bool IntOrPtrDst = Dst->isIntegerTy() || Dst->isPointerTy();


    switch (Opcode) {

    default:

      break;

    case Instruction::Trunc:

      // Check for NOOP conversions.

      if (TLI->isTruncateFree(SrcLT.second, DstLT.second))

        return 0;

      [[fallthrough]];

    case Instruction::BitCast:

      // Bitcast between types that are legalized to the same type are free and

      // assume int to/from ptr of the same size is also free.

      if (SrcLT.first == DstLT.first && IntOrPtrSrc == IntOrPtrDst &&

          SrcSize == DstSize)

        return 0;

      break;

    case Instruction::FPExt:

      if (I && getTLI()->isExtFree(I))

        return 0;

      break;

    case Instruction::ZExt:

      if (TLI->isZExtFree(SrcLT.second, DstLT.second))

        return 0;

      [[fallthrough]];

    case Instruction::SExt:

      if (I && getTLI()->isExtFree(I))

        return 0;


      // If this is a zext/sext of a load, return 0 if the corresponding

      // extending load exists on target and the result type is legal.

      if (CCH == TTI::CastContextHint::Normal) {

        EVT ExtVT = EVT::getEVT(Dst);

        EVT LoadVT = EVT::getEVT(Src);

        unsigned LType =

            Opcode == Instruction::ZExt ? ISD::ZEXTLOAD : ISD::SEXTLOAD;

        if (I) {

          if (auto *LI = dyn_cast<LoadInst>(I->getOperand(0))) {

            if (DstLT.first == SrcLT.first &&

                TLI->isLoadLegal(ExtVT, LoadVT, LI->getAlign(),

                                 LI->getPointerAddressSpace(), LType, false))

              return 0;

          } else if (auto *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) {

            switch (II->getIntrinsicID()) {

            case Intrinsic::masked_load: {

              Type *PtrType = II->getArgOperand(0)->getType();

              assert(PtrType->isPointerTy());


              if (DstLT.first == SrcLT.first &&

                  TLI->isLoadLegal(

                      ExtVT, LoadVT, II->getParamAlign(0).valueOrOne(),

                      PtrType->getPointerAddressSpace(), LType, false))

                return 0;


              break;

            }

            default:

              break;

            }

          }

        }

      }

      break;

    case Instruction::AddrSpaceCast:

      if (TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(),

                                   Dst->getPointerAddressSpace()))

        return 0;

      break;

    }


    auto *SrcVTy = dyn_cast<VectorType>(Src);

    auto *DstVTy = dyn_cast<VectorType>(Dst);


    // If the cast is marked as legal (or promote) then assume low cost.

    if (SrcLT.first == DstLT.first &&

        TLI->isOperationLegalOrPromote(ISD, DstLT.second))

      return SrcLT.first;


    // Handle scalar conversions.

    if (!SrcVTy && !DstVTy) {

      // Just check the op cost. If the operation is legal then assume it costs

      // 1.

      if (!TLI->isOperationExpand(ISD, DstLT.second))

        return 1;


      // Assume that illegal scalar instruction are expensive.

      return 4;

    }


    // Check vector-to-vector casts.

    if (DstVTy && SrcVTy) {

      // If the cast is between same-sized registers, then the check is simple.

      if (SrcLT.first == DstLT.first && SrcSize == DstSize) {


        // Assume that Zext is done using AND.

        if (Opcode == Instruction::ZExt)

          return SrcLT.first;


        // Assume that sext is done using SHL and SRA.

        if (Opcode == Instruction::SExt)

          return SrcLT.first * 2;


        // Just check the op cost. If the operation is legal then assume it

        // costs

        // 1 and multiply by the type-legalization overhead.

        if (!TLI->isOperationExpand(ISD, DstLT.second))

          return SrcLT.first * 1;

      }


      // If we are legalizing by splitting, query the concrete TTI for the cost

      // of casting the original vector twice. We also need to factor in the

      // cost of the split itself. Count that as 1, to be consistent with

      // getTypeLegalizationCost().

      bool SplitSrc =

          TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==

          TargetLowering::TypeSplitVector;

      bool SplitDst =

          TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==

          TargetLowering::TypeSplitVector;

      if ((SplitSrc || SplitDst) && SrcVTy->getElementCount().isKnownEven() &&

          DstVTy->getElementCount().isKnownEven()) {

        Type *SplitDstTy = VectorType::getHalfElementsVectorType(DstVTy);

        Type *SplitSrcTy = VectorType::getHalfElementsVectorType(SrcVTy);

        const T *TTI = thisT();

        // If both types need to be split then the split is free.

        InstructionCost SplitCost =

            (!SplitSrc || !SplitDst) ? TTI->getVectorSplitCost() : 0;

        return SplitCost +

               (2 * TTI->getCastInstrCost(Opcode, SplitDstTy, SplitSrcTy, CCH,

                                          CostKind, I));

      }


      // Scalarization cost is Invalid, can't assume any num elements.

      if (isa<ScalableVectorType>(DstVTy))

        return InstructionCost::getInvalid();


      // In other cases where the source or destination are illegal, assume

      // the operation will get scalarized.

      unsigned Num = cast<FixedVectorType>(DstVTy)->getNumElements();

      InstructionCost Cost = thisT()->getCastInstrCost(

          Opcode, Dst->getScalarType(), Src->getScalarType(), CCH, CostKind, I);


      // Return the cost of multiple scalar invocation plus the cost of

      // inserting and extracting the values.

      return getScalarizationOverhead(DstVTy, /*Insert*/ true, /*Extract*/ true,

                                      CostKind) +

             Num * Cost;

    }


    // We already handled vector-to-vector and scalar-to-scalar conversions.

    // This

    // is where we handle bitcast between vectors and scalars. We need to assume

    //  that the conversion is scalarized in one way or another.

    if (Opcode == Instruction::BitCast) {

      // Illegal bitcasts are done by storing and loading from a stack slot.

      return (SrcVTy ? getScalarizationOverhead(SrcVTy, /*Insert*/ false,

                                                /*Extract*/ true, CostKind)

                     : 0) +

             (DstVTy ? getScalarizationOverhead(DstVTy, /*Insert*/ true,

                                                /*Extract*/ false, CostKind)

                     : 0);

    }


    llvm_unreachable("Unhandled cast");

  }


  InstructionCost


  getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,

                           unsigned Index,

                           TTI::TargetCostKind CostKind) const override {

    return thisT()->getVectorInstrCost(Instruction::ExtractElement, VecTy,

                                       CostKind, Index, nullptr, nullptr) +

           thisT()->getCastInstrCost(Opcode, Dst, VecTy->getElementType(),

                                     TTI::CastContextHint::None, CostKind);

  }


  InstructionCost


  getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,

                 const Instruction *I = nullptr) const override {

    return BaseT::getCFInstrCost(Opcode, CostKind, I);

  }


  InstructionCost getCmpSelInstrCost(

      unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,

      TTI::TargetCostKind CostKind,

      TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},

      TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},

      const Instruction *I = nullptr) const override {

    const TargetLoweringBase *TLI = getTLI();

    int ISD = TLI->InstructionOpcodeToISD(Opcode);

    assert(ISD && "Invalid opcode");


    if (getTLI()->getValueType(DL, ValTy, true) == MVT::Other)

      return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,

                                       Op1Info, Op2Info, I);


    // Selects on vectors are actually vector selects.

    if (ISD == ISD::SELECT) {

      assert(CondTy && "CondTy must exist");

      if (CondTy->isVectorTy())

        ISD = ISD::VSELECT;

    }

    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);


    if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&

        !TLI->isOperationExpand(ISD, LT.second)) {

      // The operation is legal. Assume it costs 1. Multiply

      // by the type-legalization overhead.

      return LT.first * 1;

    }


    // Otherwise, assume that the cast is scalarized.

    // TODO: If one of the types get legalized by splitting, handle this

    // similarly to what getCastInstrCost() does.

    if (auto *ValVTy = dyn_cast<VectorType>(ValTy)) {

      if (isa<ScalableVectorType>(ValTy))

        return InstructionCost::getInvalid();


      unsigned Num = cast<FixedVectorType>(ValVTy)->getNumElements();

      InstructionCost Cost = thisT()->getCmpSelInstrCost(

          Opcode, ValVTy->getScalarType(), CondTy->getScalarType(), VecPred,

          CostKind, Op1Info, Op2Info, I);


      // Return the cost of multiple scalar invocation plus the cost of

      // inserting and extracting the values.

      return getScalarizationOverhead(ValVTy, /*Insert*/ true,

                                      /*Extract*/ false, CostKind) +

             Num * Cost;

    }


    // Unknown scalar opcode.

    return 1;

  }


  InstructionCost


  getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind,

                     unsigned Index, const Value *Op0, const Value *Op1,

                     TTI::VectorInstrContext VIC =

                         TTI::VectorInstrContext::None) const override {

    return getRegUsageForType(Val->getScalarType());

  }


  /// \param ScalarUserAndIdx encodes the information about extracts from a

  /// vector with 'Scalar' being the value being extracted,'User' being the user

  /// of the extract(nullptr if user is not known before vectorization) and

  /// 'Idx' being the extract lane.


  InstructionCost getVectorInstrCost(

      unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,

      Value *Scalar,

      ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx,

      TTI::VectorInstrContext VIC =

          TTI::VectorInstrContext::None) const override {

    return getVectorInstrCost(Opcode, Val, CostKind, Index, nullptr, nullptr,

                              VIC);

  }


  InstructionCost


  getVectorInstrCost(const Instruction &I, Type *Val,

                     TTI::TargetCostKind CostKind, unsigned Index,

                     TTI::VectorInstrContext VIC =

                         TTI::VectorInstrContext::None) const override {

    Value *Op0 = nullptr;

    Value *Op1 = nullptr;

    if (auto *IE = dyn_cast<InsertElementInst>(&I)) {

      Op0 = IE->getOperand(0);

      Op1 = IE->getOperand(1);

    }

    // If VIC is None, compute it from the instruction

    if (VIC == TTI::VectorInstrContext::None)

      VIC = TTI::getVectorInstrContextHint(&I);

    return thisT()->getVectorInstrCost(I.getOpcode(), Val, CostKind, Index, Op0,

                                       Op1, VIC);

  }


  InstructionCost


  getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val,

                                   TTI::TargetCostKind CostKind,

                                   unsigned Index) const override {

    unsigned NewIndex = -1;

    if (auto *FVTy = dyn_cast<FixedVectorType>(Val)) {

      assert(Index < FVTy->getNumElements() &&

             "Unexpected index from end of vector");

      NewIndex = FVTy->getNumElements() - 1 - Index;

    }

    return thisT()->getVectorInstrCost(Opcode, Val, CostKind, NewIndex, nullptr,

                                       nullptr);

  }


  InstructionCost


  getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,

                            const APInt &DemandedDstElts,

                            TTI::TargetCostKind CostKind) const override {

    assert(DemandedDstElts.getBitWidth() == (unsigned)VF * ReplicationFactor &&

           "Unexpected size of DemandedDstElts.");


    InstructionCost Cost;


    auto *SrcVT = FixedVectorType::get(EltTy, VF);

    auto *ReplicatedVT = FixedVectorType::get(EltTy, VF * ReplicationFactor);


    // The Mask shuffling cost is extract all the elements of the Mask

    // and insert each of them Factor times into the wide vector:

    //

    // E.g. an interleaved group with factor 3:

    //    %mask = icmp ult <8 x i32> %vec1, %vec2

    //    %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,

    //        <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>

    // The cost is estimated as extract all mask elements from the <8xi1> mask

    // vector and insert them factor times into the <24xi1> shuffled mask

    // vector.

    APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedDstElts, VF);

    Cost += thisT()->getScalarizationOverhead(SrcVT, DemandedSrcElts,

                                              /*Insert*/ false,

                                              /*Extract*/ true, CostKind);

    Cost += thisT()->getScalarizationOverhead(ReplicatedVT, DemandedDstElts,

                                              /*Insert*/ true,

                                              /*Extract*/ false, CostKind);


    return Cost;

  }


  InstructionCost getMemoryOpCost(

      unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,

      TTI::TargetCostKind CostKind,

      TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None},

      const Instruction *I = nullptr) const override {

    assert(!Src->isVoidTy() && "Invalid type");

    // Assume types, such as structs, are expensive.

    if (getTLI()->getValueType(DL, Src,  true) == MVT::Other)

      return 4;

    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);


    // FIXME: Arbitrary cost

    if (Opcode == Instruction::Load && CostKind == TTI::TCK_Latency)

      return 4;


    // Assuming that all loads of legal types cost 1.

    InstructionCost Cost = LT.first;

    if (CostKind != TTI::TCK_RecipThroughput)

      return Cost;


    const DataLayout &DL = this->getDataLayout();

    if (Src->isVectorTy() &&

        // In practice it's not currently possible to have a change in lane

        // length for extending loads or truncating stores so both types should

        // have the same scalable property.

        TypeSize::isKnownLT(DL.getTypeStoreSizeInBits(Src),

                            LT.second.getSizeInBits())) {

      // This is a vector load that legalizes to a larger type than the vector

      // itself. Unless the corresponding extending load or truncating store is

      // legal, then this will scalarize.

      TargetLowering::LegalizeAction LA = TargetLowering::Expand;

      EVT MemVT = getTLI()->getValueType(DL, Src);

      if (Opcode == Instruction::Store)

        LA = getTLI()->getTruncStoreAction(LT.second, MemVT, Alignment,

                                           AddressSpace);

      else

        LA = getTLI()->getLoadAction(LT.second, MemVT, Alignment, AddressSpace,

                                     ISD::EXTLOAD, false);


      if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {

        // This is a vector load/store for some illegal type that is scalarized.

        // We must account for the cost of building or decomposing the vector.

        Cost += getScalarizationOverhead(

            cast<VectorType>(Src), Opcode != Instruction::Store,

            Opcode == Instruction::Store, CostKind);

      }

    }


    return Cost;

  }


  InstructionCost getInterleavedMemoryOpCost(

      unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,

      Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,

      bool UseMaskForCond = false, bool UseMaskForGaps = false) const override {


    // We cannot scalarize scalable vectors, so return Invalid.

    if (isa<ScalableVectorType>(VecTy))

      return InstructionCost::getInvalid();


    auto *VT = cast<FixedVectorType>(VecTy);


    unsigned NumElts = VT->getNumElements();

    assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");


    unsigned NumSubElts = NumElts / Factor;

    auto *SubVT = FixedVectorType::get(VT->getElementType(), NumSubElts);


    // Firstly, the cost of load/store operation.

    InstructionCost Cost;

    if (UseMaskForCond || UseMaskForGaps) {

      unsigned IID = Opcode == Instruction::Load ? Intrinsic::masked_load

                                                 : Intrinsic::masked_store;

      Cost = thisT()->getMemIntrinsicInstrCost(

          MemIntrinsicCostAttributes(IID, VecTy, Alignment, AddressSpace),

          CostKind);

    } else

      Cost = thisT()->getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace,

                                      CostKind);


    // Legalize the vector type, and get the legalized and unlegalized type

    // sizes.

    MVT VecTyLT = getTypeLegalizationCost(VecTy).second;

    unsigned VecTySize = thisT()->getDataLayout().getTypeStoreSize(VecTy);

    unsigned VecTyLTSize = VecTyLT.getStoreSize();


    // Scale the cost of the memory operation by the fraction of legalized

    // instructions that will actually be used. We shouldn't account for the

    // cost of dead instructions since they will be removed.

    //

    // E.g., An interleaved load of factor 8:

    //       %vec = load <16 x i64>, <16 x i64>* %ptr

    //       %v0 = shufflevector %vec, undef, <0, 8>

    //

    // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be

    // used (those corresponding to elements [0:1] and [8:9] of the unlegalized

    // type). The other loads are unused.

    //

    // TODO: Note that legalization can turn masked loads/stores into unmasked

    // (legalized) loads/stores. This can be reflected in the cost.

    if (Cost.isValid() && VecTySize > VecTyLTSize) {

      // The number of loads of a legal type it will take to represent a load

      // of the unlegalized vector type.

      unsigned NumLegalInsts = divideCeil(VecTySize, VecTyLTSize);


      // The number of elements of the unlegalized type that correspond to a

      // single legal instruction.

      unsigned NumEltsPerLegalInst = divideCeil(NumElts, NumLegalInsts);


      // Determine which legal instructions will be used.

      BitVector UsedInsts(NumLegalInsts, false);

      for (unsigned Index : Indices)

        for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)

          UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);


      // Scale the cost of the load by the fraction of legal instructions that

      // will be used.

      Cost = divideCeil(UsedInsts.count() * Cost.getValue(), NumLegalInsts);

    }


    // Then plus the cost of interleave operation.

    assert(Indices.size() <= Factor &&

           "Interleaved memory op has too many members");


    const APInt DemandedAllSubElts = APInt::getAllOnes(NumSubElts);

    const APInt DemandedAllResultElts = APInt::getAllOnes(NumElts);


    APInt DemandedLoadStoreElts = APInt::getZero(NumElts);

    for (unsigned Index : Indices) {

      assert(Index < Factor && "Invalid index for interleaved memory op");

      for (unsigned Elm = 0; Elm < NumSubElts; Elm++)

        DemandedLoadStoreElts.setBit(Index + Elm * Factor);

    }


    if (Opcode == Instruction::Load) {

      // The interleave cost is similar to extract sub vectors' elements

      // from the wide vector, and insert them into sub vectors.

      //

      // E.g. An interleaved load of factor 2 (with one member of index 0):

      //      %vec = load <8 x i32>, <8 x i32>* %ptr

      //      %v0 = shuffle %vec, undef, <0, 2, 4, 6>         ; Index 0

      // The cost is estimated as extract elements at 0, 2, 4, 6 from the

      // <8 x i32> vector and insert them into a <4 x i32> vector.

      InstructionCost InsSubCost = thisT()->getScalarizationOverhead(

          SubVT, DemandedAllSubElts,

          /*Insert*/ true, /*Extract*/ false, CostKind);

      Cost += Indices.size() * InsSubCost;

      Cost += thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,

                                                /*Insert*/ false,

                                                /*Extract*/ true, CostKind);

    } else {

      // The interleave cost is extract elements from sub vectors, and

      // insert them into the wide vector.

      //

      // E.g. An interleaved store of factor 3 with 2 members at indices 0,1:

      // (using VF=4):

      //    %v0_v1 = shuffle %v0, %v1, <0,4,undef,1,5,undef,2,6,undef,3,7,undef>

      //    %gaps.mask = <true, true, false, true, true, false,

      //                  true, true, false, true, true, false>

      //    call llvm.masked.store <12 x i32> %v0_v1, <12 x i32>* %ptr,

      //                           i32 Align, <12 x i1> %gaps.mask

      // The cost is estimated as extract all elements (of actual members,

      // excluding gaps) from both <4 x i32> vectors and insert into the <12 x

      // i32> vector.

      InstructionCost ExtSubCost = thisT()->getScalarizationOverhead(

          SubVT, DemandedAllSubElts,

          /*Insert*/ false, /*Extract*/ true, CostKind);

      Cost += ExtSubCost * Indices.size();

      Cost += thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,

                                                /*Insert*/ true,

                                                /*Extract*/ false, CostKind);

    }


    if (!UseMaskForCond)

      return Cost;


    Type *I8Type = Type::getInt8Ty(VT->getContext());


    Cost += thisT()->getReplicationShuffleCost(

        I8Type, Factor, NumSubElts,

        UseMaskForGaps ? DemandedLoadStoreElts : DemandedAllResultElts,

        CostKind);


    // The Gaps mask is invariant and created outside the loop, therefore the

    // cost of creating it is not accounted for here. However if we have both

    // a MaskForGaps and some other mask that guards the execution of the

    // memory access, we need to account for the cost of And-ing the two masks

    // inside the loop.

    if (UseMaskForGaps) {

      auto *MaskVT = FixedVectorType::get(I8Type, NumElts);

      Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, MaskVT,

                                              CostKind);

    }


    return Cost;

  }


  /// Get intrinsic cost based on arguments.

  InstructionCost


  getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,

                        TTI::TargetCostKind CostKind) const override {

    // Check for generically free intrinsics.

    if (BaseT::getIntrinsicInstrCost(ICA, CostKind) == 0)

      return 0;


    // Assume that target intrinsics are cheap.

    Intrinsic::ID IID = ICA.getID();

    if (Intrinsic::isTargetIntrinsic(IID))

      return TargetTransformInfo::TCC_Basic;


    // VP Intrinsics should have the same cost as their non-vp counterpart.

    // TODO: Adjust the cost to make the vp intrinsic cheaper than its non-vp

    // counterpart when the vector length argument is smaller than the maximum

    // vector length.

    // TODO: Support other kinds of VPIntrinsics

    if (VPIntrinsic::isVPIntrinsic(ICA.getID())) {

      std::optional<unsigned> FOp =

          VPIntrinsic::getFunctionalOpcodeForVP(ICA.getID());

      if (FOp) {

        if (ICA.getID() == Intrinsic::vp_load) {

          Align Alignment;

          if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))

            Alignment = VPI->getPointerAlignment().valueOrOne();

          unsigned AS = 0;

          if (ICA.getArgTypes().size() > 1)

            if (auto *PtrTy = dyn_cast<PointerType>(ICA.getArgTypes()[0]))

              AS = PtrTy->getAddressSpace();

          return thisT()->getMemoryOpCost(*FOp, ICA.getReturnType(), Alignment,

                                          AS, CostKind);

        }

        if (ICA.getID() == Intrinsic::vp_store) {

          Align Alignment;

          if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))

            Alignment = VPI->getPointerAlignment().valueOrOne();

          unsigned AS = 0;

          if (ICA.getArgTypes().size() >= 2)

            if (auto *PtrTy = dyn_cast<PointerType>(ICA.getArgTypes()[1]))

              AS = PtrTy->getAddressSpace();

          return thisT()->getMemoryOpCost(*FOp, ICA.getArgTypes()[0], Alignment,

                                          AS, CostKind);

        }

        if (VPBinOpIntrinsic::isVPBinOp(ICA.getID()) ||

            ICA.getID() == Intrinsic::vp_fneg) {

          return thisT()->getArithmeticInstrCost(*FOp, ICA.getReturnType(),

                                                 CostKind);

        }

        if (VPCastIntrinsic::isVPCast(ICA.getID())) {

          return thisT()->getCastInstrCost(

              *FOp, ICA.getReturnType(), ICA.getArgTypes()[0],

              TTI::CastContextHint::None, CostKind);

        }

        if (VPCmpIntrinsic::isVPCmp(ICA.getID())) {

          // We can only handle vp_cmp intrinsics with underlying instructions.

          if (ICA.getInst()) {

            assert(FOp);

            auto *UI = cast<VPCmpIntrinsic>(ICA.getInst());

            return thisT()->getCmpSelInstrCost(*FOp, ICA.getArgTypes()[0],

                                               ICA.getReturnType(),

                                               UI->getPredicate(), CostKind);

          }

        }

      }

      if (ICA.getID() == Intrinsic::vp_load_ff) {

        Type *RetTy = ICA.getReturnType();

        Type *DataTy = cast<StructType>(RetTy)->getElementType(0);

        Align Alignment;

        if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))

          Alignment = VPI->getPointerAlignment().valueOrOne();

        return thisT()->getMemIntrinsicInstrCost(

            MemIntrinsicCostAttributes(ICA.getID(), DataTy, Alignment),

            CostKind);

      }

      if (ICA.getID() == Intrinsic::vp_scatter) {

        if (ICA.isTypeBasedOnly()) {

          IntrinsicCostAttributes MaskedScatter(

              *VPIntrinsic::getFunctionalIntrinsicIDForVP(ICA.getID()),

              ICA.getReturnType(), ArrayRef(ICA.getArgTypes()).drop_back(1),

              ICA.getFlags());

          return getTypeBasedIntrinsicInstrCost(MaskedScatter, CostKind);

        }

        Align Alignment;

        if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))

          Alignment = VPI->getPointerAlignment().valueOrOne();

        bool VarMask = isa<Constant>(ICA.getArgs()[2]);

        return thisT()->getMemIntrinsicInstrCost(

            MemIntrinsicCostAttributes(Intrinsic::vp_scatter,

                                       ICA.getArgTypes()[0], ICA.getArgs()[1],

                                       VarMask, Alignment, nullptr),

            CostKind);

      }

      if (ICA.getID() == Intrinsic::vp_gather) {

        if (ICA.isTypeBasedOnly()) {

          IntrinsicCostAttributes MaskedGather(

              *VPIntrinsic::getFunctionalIntrinsicIDForVP(ICA.getID()),

              ICA.getReturnType(), ArrayRef(ICA.getArgTypes()).drop_back(1),

              ICA.getFlags());

          return getTypeBasedIntrinsicInstrCost(MaskedGather, CostKind);

        }

        Align Alignment;

        if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))

          Alignment = VPI->getPointerAlignment().valueOrOne();

        bool VarMask = isa<Constant>(ICA.getArgs()[1]);

        return thisT()->getMemIntrinsicInstrCost(

            MemIntrinsicCostAttributes(Intrinsic::vp_gather,

                                       ICA.getReturnType(), ICA.getArgs()[0],

                                       VarMask, Alignment, nullptr),

            CostKind);

      }


      if (ICA.getID() == Intrinsic::vp_select ||

          ICA.getID() == Intrinsic::vp_merge) {

        TTI::OperandValueInfo OpInfoX, OpInfoY;

        if (!ICA.isTypeBasedOnly()) {

          OpInfoX = TTI::getOperandInfo(ICA.getArgs()[0]);

          OpInfoY = TTI::getOperandInfo(ICA.getArgs()[1]);

        }

        return getCmpSelInstrCost(

            Instruction::Select, ICA.getReturnType(), ICA.getArgTypes()[0],

            CmpInst::BAD_ICMP_PREDICATE, CostKind, OpInfoX, OpInfoY);

      }


      std::optional<Intrinsic::ID> FID =

          VPIntrinsic::getFunctionalIntrinsicIDForVP(ICA.getID());


      // Not functionally equivalent but close enough for cost modelling.

      if (ICA.getID() == Intrinsic::experimental_vp_reverse)

        FID = Intrinsic::vector_reverse;


      if (FID) {

        // Non-vp version will have same arg types except mask and vector

        // length.

        assert(ICA.getArgTypes().size() >= 2 &&

               "Expected VPIntrinsic to have Mask and Vector Length args and "

               "types");


        ArrayRef<const Value *> NewArgs = ArrayRef(ICA.getArgs());

        if (!ICA.isTypeBasedOnly())

          NewArgs = NewArgs.drop_back(2);

        ArrayRef<Type *> NewTys = ArrayRef(ICA.getArgTypes()).drop_back(2);


        // VPReduction intrinsics have a start value argument that their non-vp

        // counterparts do not have, except for the fadd and fmul non-vp

        // counterpart.

        if (VPReductionIntrinsic::isVPReduction(ICA.getID()) &&

            *FID != Intrinsic::vector_reduce_fadd &&

            *FID != Intrinsic::vector_reduce_fmul) {

          if (!ICA.isTypeBasedOnly())

            NewArgs = NewArgs.drop_front();

          NewTys = NewTys.drop_front();

        }


        IntrinsicCostAttributes NewICA(*FID, ICA.getReturnType(), NewArgs,

                                       NewTys, ICA.getFlags());

        return thisT()->getIntrinsicInstrCost(NewICA, CostKind);

      }

    }


    if (ICA.isTypeBasedOnly())

      return getTypeBasedIntrinsicInstrCost(ICA, CostKind);


    Type *RetTy = ICA.getReturnType();


    ElementCount RetVF = isVectorizedTy(RetTy) ? getVectorizedTypeVF(RetTy)

                                               : ElementCount::getFixed(1);


    const IntrinsicInst *I = ICA.getInst();

    const SmallVectorImpl<const Value *> &Args = ICA.getArgs();

    FastMathFlags FMF = ICA.getFlags();

    switch (IID) {

    default:

      break;


    case Intrinsic::powi:

      if (auto *RHSC = dyn_cast<ConstantInt>(Args[1])) {

        bool ShouldOptForSize = I->getParent()->getParent()->hasOptSize();

        if (getTLI()->isBeneficialToExpandPowI(RHSC->getSExtValue(),

                                               ShouldOptForSize)) {

          // The cost is modeled on the expansion performed by ExpandPowI in

          // SelectionDAGBuilder.

          APInt Exponent = RHSC->getValue().abs();

          unsigned ActiveBits = Exponent.getActiveBits();

          unsigned PopCount = Exponent.popcount();

          InstructionCost Cost = (ActiveBits + PopCount - 2) *

                                 thisT()->getArithmeticInstrCost(

                                     Instruction::FMul, RetTy, CostKind);

          if (RHSC->isNegative())

            Cost += thisT()->getArithmeticInstrCost(Instruction::FDiv, RetTy,

                                                    CostKind);

          return Cost;

        }

      }

      break;

    case Intrinsic::cttz:

      // FIXME: If necessary, this should go in target-specific overrides.

      if (RetVF.isScalar() && getTLI()->isCheapToSpeculateCttz(RetTy))

        return TargetTransformInfo::TCC_Basic;

      break;


    case Intrinsic::ctlz:

      // FIXME: If necessary, this should go in target-specific overrides.

      if (RetVF.isScalar() && getTLI()->isCheapToSpeculateCtlz(RetTy))

        return TargetTransformInfo::TCC_Basic;

      break;


    case Intrinsic::memcpy:

      return thisT()->getMemcpyCost(ICA.getInst());


    case Intrinsic::masked_scatter: {

      const Value *Mask = Args[2];

      bool VarMask = !isa<Constant>(Mask);

      Align Alignment = I->getParamAlign(1).valueOrOne();

      return thisT()->getMemIntrinsicInstrCost(

          MemIntrinsicCostAttributes(Intrinsic::masked_scatter,

                                     ICA.getArgTypes()[0], Args[1], VarMask,

                                     Alignment, I),

          CostKind);

    }

    case Intrinsic::masked_gather: {

      const Value *Mask = Args[1];

      bool VarMask = !isa<Constant>(Mask);

      Align Alignment = I->getParamAlign(0).valueOrOne();

      return thisT()->getMemIntrinsicInstrCost(

          MemIntrinsicCostAttributes(Intrinsic::masked_gather, RetTy, Args[0],

                                     VarMask, Alignment, I),

          CostKind);

    }

    case Intrinsic::masked_compressstore: {

      const Value *Data = Args[0];

      const Value *Mask = Args[2];

      Align Alignment = I->getParamAlign(1).valueOrOne();

      return thisT()->getMemIntrinsicInstrCost(

          MemIntrinsicCostAttributes(IID, Data->getType(), !isa<Constant>(Mask),

                                     Alignment, I),

          CostKind);

    }

    case Intrinsic::masked_expandload: {

      const Value *Mask = Args[1];

      Align Alignment = I->getParamAlign(0).valueOrOne();

      return thisT()->getMemIntrinsicInstrCost(

          MemIntrinsicCostAttributes(IID, RetTy, !isa<Constant>(Mask),

                                     Alignment, I),

          CostKind);

    }

    case Intrinsic::experimental_vp_strided_store: {

      const Value *Data = Args[0];

      const Value *Ptr = Args[1];

      const Value *Mask = Args[3];

      const Value *EVL = Args[4];

      bool VarMask = !isa<Constant>(Mask) || !isa<Constant>(EVL);

      Type *EltTy = cast<VectorType>(Data->getType())->getElementType();

      Align Alignment =

          I->getParamAlign(1).value_or(thisT()->DL.getABITypeAlign(EltTy));

      return thisT()->getMemIntrinsicInstrCost(

          MemIntrinsicCostAttributes(IID, Data->getType(), Ptr, VarMask,

                                     Alignment, I),

          CostKind);

    }

    case Intrinsic::experimental_vp_strided_load: {

      const Value *Ptr = Args[0];

      const Value *Mask = Args[2];

      const Value *EVL = Args[3];

      bool VarMask = !isa<Constant>(Mask) || !isa<Constant>(EVL);

      Type *EltTy = cast<VectorType>(RetTy)->getElementType();

      Align Alignment =

          I->getParamAlign(0).value_or(thisT()->DL.getABITypeAlign(EltTy));

      return thisT()->getMemIntrinsicInstrCost(

          MemIntrinsicCostAttributes(IID, RetTy, Ptr, VarMask, Alignment, I),

          CostKind);

    }

    case Intrinsic::stepvector: {

      if (isa<ScalableVectorType>(RetTy))

        return BaseT::getIntrinsicInstrCost(ICA, CostKind);

      // The cost of materialising a constant integer vector.

      return TargetTransformInfo::TCC_Basic;

    }

    case Intrinsic::vector_extract: {

      // FIXME: Handle case where a scalable vector is extracted from a scalable

      // vector

      if (isa<ScalableVectorType>(RetTy))

        return BaseT::getIntrinsicInstrCost(ICA, CostKind);

      unsigned Index = cast<ConstantInt>(Args[1])->getZExtValue();

      return thisT()->getShuffleCost(TTI::SK_ExtractSubvector,

                                     cast<VectorType>(RetTy),

                                     cast<VectorType>(Args[0]->getType()), {},

                                     CostKind, Index, cast<VectorType>(RetTy));

    }

    case Intrinsic::vector_insert: {

      // FIXME: Handle case where a scalable vector is inserted into a scalable

      // vector

      if (isa<ScalableVectorType>(Args[1]->getType()))

        return BaseT::getIntrinsicInstrCost(ICA, CostKind);

      unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();

      return thisT()->getShuffleCost(

          TTI::SK_InsertSubvector, cast<VectorType>(RetTy),

          cast<VectorType>(Args[0]->getType()), {}, CostKind, Index,

          cast<VectorType>(Args[1]->getType()));

    }

    case Intrinsic::vector_splice_left:

    case Intrinsic::vector_splice_right: {

      auto *COffset = dyn_cast<ConstantInt>(Args[2]);

      if (!COffset)

        break;

      unsigned Index = COffset->getZExtValue();

      return thisT()->getShuffleCost(

          TTI::SK_Splice, cast<VectorType>(RetTy),

          cast<VectorType>(Args[0]->getType()), {}, CostKind,

          IID == Intrinsic::vector_splice_left ? Index : -Index,

          cast<VectorType>(RetTy));

    }

    case Intrinsic::vector_reduce_add:

    case Intrinsic::vector_reduce_mul:

    case Intrinsic::vector_reduce_and:

    case Intrinsic::vector_reduce_or:

    case Intrinsic::vector_reduce_xor:

    case Intrinsic::vector_reduce_smax:

    case Intrinsic::vector_reduce_smin:

    case Intrinsic::vector_reduce_fmax:

    case Intrinsic::vector_reduce_fmin:

    case Intrinsic::vector_reduce_fmaximum:

    case Intrinsic::vector_reduce_fminimum:

    case Intrinsic::vector_reduce_umax:

    case Intrinsic::vector_reduce_umin: {

      IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, I, 1);

      return getTypeBasedIntrinsicInstrCost(Attrs, CostKind);

    }

    case Intrinsic::vector_reduce_fadd:

    case Intrinsic::vector_reduce_fmul: {

      IntrinsicCostAttributes Attrs(

          IID, RetTy, {Args[0]->getType(), Args[1]->getType()}, FMF, I, 1);

      return getTypeBasedIntrinsicInstrCost(Attrs, CostKind);

    }

    case Intrinsic::fshl:

    case Intrinsic::fshr: {

      const Value *X = Args[0];

      const Value *Y = Args[1];

      const Value *Z = Args[2];

      const TTI::OperandValueInfo OpInfoX = TTI::getOperandInfo(X);

      const TTI::OperandValueInfo OpInfoY = TTI::getOperandInfo(Y);

      const TTI::OperandValueInfo OpInfoZ = TTI::getOperandInfo(Z);


      // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))

      // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))

      InstructionCost Cost = 0;

      Cost +=

          thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind);

      Cost += thisT()->getArithmeticInstrCost(

          BinaryOperator::Shl, RetTy, CostKind, OpInfoX,

          {OpInfoZ.Kind, TTI::OP_None});

      Cost += thisT()->getArithmeticInstrCost(

          BinaryOperator::LShr, RetTy, CostKind, OpInfoY,

          {OpInfoZ.Kind, TTI::OP_None});


      if (!OpInfoZ.isConstant()) {

        Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy,

                                                CostKind);

        // Non-constant shift amounts requires a modulo. If the typesize is a

        // power-2 then this will be converted to an and, otherwise it will use

        // a urem.

        Cost += thisT()->getArithmeticInstrCost(

            isPowerOf2_32(RetTy->getScalarSizeInBits()) ? BinaryOperator::And

                                                        : BinaryOperator::URem,

            RetTy, CostKind, OpInfoZ,

            {TTI::OK_UniformConstantValue, TTI::OP_None});

        // For non-rotates (X != Y) we must add shift-by-zero handling costs.

        if (X != Y) {

          Type *CondTy = RetTy->getWithNewBitWidth(1);

          Cost += thisT()->getCmpSelInstrCost(

              BinaryOperator::ICmp, RetTy, CondTy, CmpInst::ICMP_EQ, CostKind);

          Cost +=

              thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,

                                          CmpInst::ICMP_EQ, CostKind);

        }

      }

      return Cost;

    }

    case Intrinsic::experimental_cttz_elts: {

      EVT ArgType = getTLI()->getValueType(DL, ICA.getArgTypes()[0], true);


      // If we're not expanding the intrinsic then we assume this is cheap

      // to implement.

      if (!getTLI()->shouldExpandCttzElements(ArgType))

        return getTypeLegalizationCost(RetTy).first;


      // TODO: The costs below reflect the expansion code in

      // SelectionDAGBuilder, but we may want to sacrifice some accuracy in

      // favour of compile time.


      // Find the smallest "sensible" element type to use for the expansion.

      bool ZeroIsPoison = !cast<ConstantInt>(Args[1])->isZero();

      ConstantRange VScaleRange(APInt(64, 1), APInt::getZero(64));

      if (isa<ScalableVectorType>(ICA.getArgTypes()[0]) && I && I->getCaller())

        VScaleRange = getVScaleRange(I->getCaller(), 64);


      unsigned EltWidth = getTLI()->getBitWidthForCttzElements(

          getTLI()->getValueType(DL, RetTy), ArgType.getVectorElementCount(),

          ZeroIsPoison, &VScaleRange);

      Type *NewEltTy = IntegerType::getIntNTy(RetTy->getContext(), EltWidth);


      // Create the new vector type & get the vector length

      Type *NewVecTy = VectorType::get(

          NewEltTy, cast<VectorType>(Args[0]->getType())->getElementCount());


      IntrinsicCostAttributes StepVecAttrs(Intrinsic::stepvector, NewVecTy, {},

                                           FMF);

      InstructionCost Cost =

          thisT()->getIntrinsicInstrCost(StepVecAttrs, CostKind);


      Cost +=

          thisT()->getArithmeticInstrCost(Instruction::Sub, NewVecTy, CostKind);

      Cost += thisT()->getCastInstrCost(Instruction::SExt, NewVecTy,

                                        Args[0]->getType(),

                                        TTI::CastContextHint::None, CostKind);

      Cost +=

          thisT()->getArithmeticInstrCost(Instruction::And, NewVecTy, CostKind);


      IntrinsicCostAttributes ReducAttrs(Intrinsic::vector_reduce_umax,

                                         NewEltTy, NewVecTy, FMF, I, 1);

      Cost += thisT()->getTypeBasedIntrinsicInstrCost(ReducAttrs, CostKind);

      Cost +=

          thisT()->getArithmeticInstrCost(Instruction::Sub, NewEltTy, CostKind);


      return Cost;

    }

    case Intrinsic::get_active_lane_mask:

    case Intrinsic::experimental_vector_match:

    case Intrinsic::experimental_vector_histogram_add:

    case Intrinsic::experimental_vector_histogram_uadd_sat:

    case Intrinsic::experimental_vector_histogram_umax:

    case Intrinsic::experimental_vector_histogram_umin:

    case Intrinsic::masked_udiv:

    case Intrinsic::masked_sdiv:

    case Intrinsic::masked_urem:

    case Intrinsic::masked_srem:

      return thisT()->getTypeBasedIntrinsicInstrCost(ICA, CostKind);

    case Intrinsic::modf:

    case Intrinsic::sincos:

    case Intrinsic::sincospi: {

      std::optional<unsigned> CallRetElementIndex;

      // The first element of the modf result is returned by value in the

      // libcall.

      if (ICA.getID() == Intrinsic::modf)

        CallRetElementIndex = 0;


      if (auto Cost = getMultipleResultIntrinsicVectorLibCallCost(

              ICA, CostKind, CallRetElementIndex))

        return *Cost;

      // Otherwise, fallback to default scalarization cost.

      break;

    }

    case Intrinsic::loop_dependence_war_mask:

    case Intrinsic::loop_dependence_raw_mask: {

      // Compute the cost of the expanded version of these intrinsics:

      //

      // The possible expansions are...

      //

      // loop_dependence_war_mask:

      //   diff = (addrB - addrA) / eltSize

      //   cmp = icmp sle diff, 0

      //   upper_bound = select cmp, -1, diff

      //   mask = get_active_lane_mask 0, upper_bound

      //

      // loop_dependence_raw_mask:

      //   diff = (abs(addrB - addrA)) / eltSize

      //   cmp = icmp eq diff, 0

      //   upper_bound = select cmp, -1, diff

      //   mask = get_active_lane_mask 0, upper_bound

      //

      Type *AddrTy = ICA.getArgTypes()[0];

      bool IsReadAfterWrite = IID == Intrinsic::loop_dependence_raw_mask;


      InstructionCost Cost =

          thisT()->getArithmeticInstrCost(Instruction::Sub, AddrTy, CostKind);

      if (IsReadAfterWrite) {

        IntrinsicCostAttributes AbsAttrs(Intrinsic::abs, AddrTy, {AddrTy}, {});

        Cost += thisT()->getIntrinsicInstrCost(AbsAttrs, CostKind);

      }


      TTI::OperandValueInfo EltSizeOpInfo =

          TTI::getOperandInfo(ICA.getArgs()[2]);

      Cost += thisT()->getArithmeticInstrCost(Instruction::SDiv, AddrTy,

                                              CostKind, {}, EltSizeOpInfo);


      Type *CondTy = IntegerType::getInt1Ty(RetTy->getContext());

      CmpInst::Predicate Pred =

          IsReadAfterWrite ? CmpInst::ICMP_EQ : CmpInst::ICMP_SLE;

      Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, CondTy, AddrTy,

                                          Pred, CostKind);

      Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, AddrTy,

                                          CondTy, Pred, CostKind);


      IntrinsicCostAttributes Attrs(Intrinsic::get_active_lane_mask, RetTy,

                                    {AddrTy, AddrTy}, FMF);

      Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);

      return Cost;

    }

    }


    // Assume that we need to scalarize this intrinsic.)

    // Compute the scalarization overhead based on Args for a vector

    // intrinsic.

    InstructionCost ScalarizationCost = InstructionCost::getInvalid();

    if (RetVF.isVector() && !RetVF.isScalable()) {

      ScalarizationCost = 0;

      if (!RetTy->isVoidTy()) {

        for (Type *VectorTy : getContainedTypes(RetTy)) {

          ScalarizationCost += getScalarizationOverhead(

              cast<VectorType>(VectorTy),

              /*Insert=*/true, /*Extract=*/false, CostKind);

        }

      }

      ScalarizationCost += getOperandsScalarizationOverhead(

          filterConstantAndDuplicatedOperands(Args, ICA.getArgTypes()),

          CostKind);

    }


    IntrinsicCostAttributes Attrs(IID, RetTy, ICA.getArgTypes(), FMF, I,

                                  ScalarizationCost);

    return thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind);

  }


  /// Get intrinsic cost based on argument types.

  /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the

  /// cost of scalarizing the arguments and the return value will be computed

  /// based on types.

  InstructionCost


  getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,

                                 TTI::TargetCostKind CostKind) const {

    Intrinsic::ID IID = ICA.getID();

    Type *RetTy = ICA.getReturnType();

    const SmallVectorImpl<Type *> &Tys = ICA.getArgTypes();

    FastMathFlags FMF = ICA.getFlags();

    InstructionCost ScalarizationCostPassed = ICA.getScalarizationCost();

    bool SkipScalarizationCost = ICA.skipScalarizationCost();


    VectorType *VecOpTy = nullptr;

    if (!Tys.empty()) {

      // The vector reduction operand is operand 0 except for fadd/fmul.

      // Their operand 0 is a scalar start value, so the vector op is operand 1.

      unsigned VecTyIndex = 0;

      if (IID == Intrinsic::vector_reduce_fadd ||

          IID == Intrinsic::vector_reduce_fmul)

        VecTyIndex = 1;

      assert(Tys.size() > VecTyIndex && "Unexpected IntrinsicCostAttributes");

      VecOpTy = dyn_cast<VectorType>(Tys[VecTyIndex]);

    }


    // Library call cost - other than size, make it expensive.

    unsigned SingleCallCost = CostKind == TTI::TCK_CodeSize ? 1 : 10;

    unsigned ISD = 0;

    switch (IID) {

    default: {

      // Scalable vectors cannot be scalarized, so return Invalid.

      if (isa<ScalableVectorType>(RetTy) || any_of(Tys, [](const Type *Ty) {

            return isa<ScalableVectorType>(Ty);

          }))

        return InstructionCost::getInvalid();


      // Assume that we need to scalarize this intrinsic.

      InstructionCost ScalarizationCost =

          SkipScalarizationCost ? ScalarizationCostPassed : 0;

      unsigned ScalarCalls = 1;

      Type *ScalarRetTy = RetTy;

      if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {

        if (!SkipScalarizationCost)

          ScalarizationCost = getScalarizationOverhead(

              RetVTy, /*Insert*/ true, /*Extract*/ false, CostKind);

        ScalarCalls = std::max(ScalarCalls,

                               cast<FixedVectorType>(RetVTy)->getNumElements());

        ScalarRetTy = RetTy->getScalarType();

      }

      SmallVector<Type *, 4> ScalarTys;

      for (Type *Ty : Tys) {

        if (auto *VTy = dyn_cast<VectorType>(Ty)) {

          if (!SkipScalarizationCost)

            ScalarizationCost += getScalarizationOverhead(

                VTy, /*Insert*/ false, /*Extract*/ true, CostKind);

          ScalarCalls = std::max(ScalarCalls,

                                 cast<FixedVectorType>(VTy)->getNumElements());

          Ty = Ty->getScalarType();

        }

        ScalarTys.push_back(Ty);

      }

      if (ScalarCalls == 1)

        return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.


      IntrinsicCostAttributes ScalarAttrs(IID, ScalarRetTy, ScalarTys, FMF);

      InstructionCost ScalarCost =

          thisT()->getIntrinsicInstrCost(ScalarAttrs, CostKind);


      return ScalarCalls * ScalarCost + ScalarizationCost;

    }

    // Look for intrinsics that can be lowered directly or turned into a scalar

    // intrinsic call.

    case Intrinsic::sqrt:

      ISD = ISD::FSQRT;

      break;

    case Intrinsic::sin:

      ISD = ISD::FSIN;

      break;

    case Intrinsic::cos:

      ISD = ISD::FCOS;

      break;

    case Intrinsic::sincos:

      ISD = ISD::FSINCOS;

      break;

    case Intrinsic::sincospi:

      ISD = ISD::FSINCOSPI;

      break;

    case Intrinsic::modf:

      ISD = ISD::FMODF;

      break;

    case Intrinsic::tan:

      ISD = ISD::FTAN;

      break;

    case Intrinsic::asin:

      ISD = ISD::FASIN;

      break;

    case Intrinsic::acos:

      ISD = ISD::FACOS;

      break;

    case Intrinsic::atan:

      ISD = ISD::FATAN;

      break;

    case Intrinsic::atan2:

      ISD = ISD::FATAN2;

      break;

    case Intrinsic::sinh:

      ISD = ISD::FSINH;

      break;

    case Intrinsic::cosh:

      ISD = ISD::FCOSH;

      break;

    case Intrinsic::tanh:

      ISD = ISD::FTANH;

      break;

    case Intrinsic::exp:

      ISD = ISD::FEXP;

      break;

    case Intrinsic::exp2:

      ISD = ISD::FEXP2;

      break;

    case Intrinsic::exp10:

      ISD = ISD::FEXP10;

      break;

    case Intrinsic::log:

      ISD = ISD::FLOG;

      break;

    case Intrinsic::log10:

      ISD = ISD::FLOG10;

      break;

    case Intrinsic::log2:

      ISD = ISD::FLOG2;

      break;

    case Intrinsic::ldexp:

      ISD = ISD::FLDEXP;

      break;

    case Intrinsic::fabs:

      ISD = ISD::FABS;

      break;

    case Intrinsic::canonicalize:

      ISD = ISD::FCANONICALIZE;

      break;

    case Intrinsic::minnum:

      ISD = ISD::FMINNUM;

      break;

    case Intrinsic::maxnum:

      ISD = ISD::FMAXNUM;

      break;

    case Intrinsic::minimum:

      ISD = ISD::FMINIMUM;

      break;

    case Intrinsic::maximum:

      ISD = ISD::FMAXIMUM;

      break;

    case Intrinsic::minimumnum:

      ISD = ISD::FMINIMUMNUM;

      break;

    case Intrinsic::maximumnum:

      ISD = ISD::FMAXIMUMNUM;

      break;

    case Intrinsic::copysign:

      ISD = ISD::FCOPYSIGN;

      break;

    case Intrinsic::floor:

      ISD = ISD::FFLOOR;

      break;

    case Intrinsic::ceil:

      ISD = ISD::FCEIL;

      break;

    case Intrinsic::trunc:

      ISD = ISD::FTRUNC;

      break;

    case Intrinsic::nearbyint:

      ISD = ISD::FNEARBYINT;

      break;

    case Intrinsic::rint:

      ISD = ISD::FRINT;

      break;

    case Intrinsic::lrint:

      ISD = ISD::LRINT;

      break;

    case Intrinsic::llrint:

      ISD = ISD::LLRINT;

      break;

    case Intrinsic::round:

      ISD = ISD::FROUND;

      break;

    case Intrinsic::roundeven:

      ISD = ISD::FROUNDEVEN;

      break;

    case Intrinsic::lround:

      ISD = ISD::LROUND;

      break;

    case Intrinsic::llround:

      ISD = ISD::LLROUND;

      break;

    case Intrinsic::pow:

      ISD = ISD::FPOW;

      break;

    case Intrinsic::fma:

      ISD = ISD::FMA;

      break;

    case Intrinsic::fmuladd:

      ISD = ISD::FMA;

      break;

    case Intrinsic::experimental_constrained_fmuladd:

      ISD = ISD::STRICT_FMA;

      break;

    // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.

    case Intrinsic::lifetime_start:

    case Intrinsic::lifetime_end:

    case Intrinsic::sideeffect:

    case Intrinsic::pseudoprobe:

    case Intrinsic::arithmetic_fence:

      return 0;

    case Intrinsic::masked_store: {

      Type *Ty = Tys[0];

      Align TyAlign = thisT()->DL.getABITypeAlign(Ty);

      return thisT()->getMemIntrinsicInstrCost(

          MemIntrinsicCostAttributes(IID, Ty, TyAlign, 0), CostKind);

    }

    case Intrinsic::masked_load: {

      Type *Ty = RetTy;

      Align TyAlign = thisT()->DL.getABITypeAlign(Ty);

      return thisT()->getMemIntrinsicInstrCost(

          MemIntrinsicCostAttributes(IID, Ty, TyAlign, 0), CostKind);

    }

    case Intrinsic::experimental_vp_strided_store: {

      auto *Ty = cast<VectorType>(ICA.getArgTypes()[0]);

      Align Alignment = thisT()->DL.getABITypeAlign(Ty->getElementType());

      return thisT()->getMemIntrinsicInstrCost(

          MemIntrinsicCostAttributes(IID, Ty, /*Ptr=*/nullptr,

                                     /*VariableMask=*/true, Alignment,

                                     ICA.getInst()),

          CostKind);

    }

    case Intrinsic::experimental_vp_strided_load: {

      auto *Ty = cast<VectorType>(ICA.getReturnType());

      Align Alignment = thisT()->DL.getABITypeAlign(Ty->getElementType());

      return thisT()->getMemIntrinsicInstrCost(

          MemIntrinsicCostAttributes(IID, Ty, /*Ptr=*/nullptr,

                                     /*VariableMask=*/true, Alignment,

                                     ICA.getInst()),

          CostKind);

    }

    case Intrinsic::vector_reduce_add:

    case Intrinsic::vector_reduce_mul:

    case Intrinsic::vector_reduce_and:

    case Intrinsic::vector_reduce_or:

    case Intrinsic::vector_reduce_xor:

      return thisT()->getArithmeticReductionCost(

          getArithmeticReductionInstruction(IID), VecOpTy, std::nullopt,

          CostKind);

    case Intrinsic::vector_reduce_fadd:

    case Intrinsic::vector_reduce_fmul:

      return thisT()->getArithmeticReductionCost(

          getArithmeticReductionInstruction(IID), VecOpTy, FMF, CostKind);

    case Intrinsic::vector_reduce_smax:

    case Intrinsic::vector_reduce_smin:

    case Intrinsic::vector_reduce_umax:

    case Intrinsic::vector_reduce_umin:

    case Intrinsic::vector_reduce_fmax:

    case Intrinsic::vector_reduce_fmin:

    case Intrinsic::vector_reduce_fmaximum:

    case Intrinsic::vector_reduce_fminimum:

      return thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID),

                                             VecOpTy, ICA.getFlags(), CostKind);

    case Intrinsic::experimental_vector_match: {

      auto *SearchTy = cast<VectorType>(ICA.getArgTypes()[0]);

      auto *NeedleTy = cast<FixedVectorType>(ICA.getArgTypes()[1]);

      unsigned SearchSize = NeedleTy->getNumElements();


      // If we're not expanding the intrinsic then we assume this is cheap to

      // implement.

      EVT SearchVT = getTLI()->getValueType(DL, SearchTy);

      if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize))

        return getTypeLegalizationCost(RetTy).first;


      // Approximate the cost based on the expansion code in

      // SelectionDAGBuilder.

      InstructionCost Cost = 0;

      Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, NeedleTy,

                                          CostKind, 1, nullptr, nullptr);

      Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, SearchTy,

                                          CostKind, 0, nullptr, nullptr);

      Cost += thisT()->getShuffleCost(TTI::SK_Broadcast, SearchTy, SearchTy, {},

                                      CostKind, 0, nullptr);

      Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SearchTy, RetTy,

                                          CmpInst::ICMP_EQ, CostKind);

      Cost +=

          thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind);

      Cost *= SearchSize;

      Cost +=

          thisT()->getArithmeticInstrCost(BinaryOperator::And, RetTy, CostKind);

      return Cost;

    }

    case Intrinsic::vector_reverse:

      return thisT()->getShuffleCost(TTI::SK_Reverse, cast<VectorType>(RetTy),

                                     cast<VectorType>(ICA.getArgTypes()[0]), {},

                                     CostKind, 0, cast<VectorType>(RetTy));

    case Intrinsic::experimental_vector_histogram_add:

    case Intrinsic::experimental_vector_histogram_uadd_sat:

    case Intrinsic::experimental_vector_histogram_umax:

    case Intrinsic::experimental_vector_histogram_umin: {

      FixedVectorType *PtrsTy = dyn_cast<FixedVectorType>(ICA.getArgTypes()[0]);

      Type *EltTy = ICA.getArgTypes()[1];


      // Targets with scalable vectors must handle this on their own.

      if (!PtrsTy)

        return InstructionCost::getInvalid();


      Align Alignment = thisT()->DL.getABITypeAlign(EltTy);

      InstructionCost Cost = 0;

      Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, PtrsTy,

                                          CostKind, 1, nullptr, nullptr);

      Cost += thisT()->getMemoryOpCost(Instruction::Load, EltTy, Alignment, 0,

                                       CostKind);

      switch (IID) {

      default:

        llvm_unreachable("Unhandled histogram update operation.");

      case Intrinsic::experimental_vector_histogram_add:

        Cost +=

            thisT()->getArithmeticInstrCost(Instruction::Add, EltTy, CostKind);

        break;

      case Intrinsic::experimental_vector_histogram_uadd_sat: {

        IntrinsicCostAttributes UAddSat(Intrinsic::uadd_sat, EltTy, {EltTy});

        Cost += thisT()->getIntrinsicInstrCost(UAddSat, CostKind);

        break;

      }

      case Intrinsic::experimental_vector_histogram_umax: {

        IntrinsicCostAttributes UMax(Intrinsic::umax, EltTy, {EltTy});

        Cost += thisT()->getIntrinsicInstrCost(UMax, CostKind);

        break;

      }

      case Intrinsic::experimental_vector_histogram_umin: {

        IntrinsicCostAttributes UMin(Intrinsic::umin, EltTy, {EltTy});

        Cost += thisT()->getIntrinsicInstrCost(UMin, CostKind);

        break;

      }

      }

      Cost += thisT()->getMemoryOpCost(Instruction::Store, EltTy, Alignment, 0,

                                       CostKind);

      Cost *= PtrsTy->getNumElements();

      return Cost;

    }

    case Intrinsic::get_active_lane_mask: {

      Type *ArgTy = ICA.getArgTypes()[0];

      EVT ResVT = getTLI()->getValueType(DL, RetTy, true);

      EVT ArgVT = getTLI()->getValueType(DL, ArgTy, true);


      // If we're not expanding the intrinsic then we assume this is cheap

      // to implement.

      if (!getTLI()->shouldExpandGetActiveLaneMask(ResVT, ArgVT))

        return getTypeLegalizationCost(RetTy).first;


      // Create the expanded types that will be used to calculate the uadd_sat

      // operation.

      Type *ExpRetTy =

          VectorType::get(ArgTy, cast<VectorType>(RetTy)->getElementCount());

      IntrinsicCostAttributes Attrs(Intrinsic::uadd_sat, ExpRetTy, {}, FMF);

      InstructionCost Cost =

          thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind);

      Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, ExpRetTy, RetTy,

                                          CmpInst::ICMP_ULT, CostKind);

      return Cost;

    }

    case Intrinsic::experimental_memset_pattern:

      // This cost is set to match the cost of the memset_pattern16 libcall.

      // It should likely be re-evaluated after migration to this intrinsic

      // is complete.

      return TTI::TCC_Basic * 4;

    case Intrinsic::abs:

      ISD = ISD::ABS;

      break;

    case Intrinsic::fshl:

      ISD = ISD::FSHL;

      break;

    case Intrinsic::fshr:

      ISD = ISD::FSHR;

      break;

    case Intrinsic::smax:

      ISD = ISD::SMAX;

      break;

    case Intrinsic::smin:

      ISD = ISD::SMIN;

      break;

    case Intrinsic::umax:

      ISD = ISD::UMAX;

      break;

    case Intrinsic::umin:

      ISD = ISD::UMIN;

      break;

    case Intrinsic::sadd_sat:

      ISD = ISD::SADDSAT;

      break;

    case Intrinsic::ssub_sat:

      ISD = ISD::SSUBSAT;

      break;

    case Intrinsic::uadd_sat:

      ISD = ISD::UADDSAT;

      break;

    case Intrinsic::usub_sat:

      ISD = ISD::USUBSAT;

      break;

    case Intrinsic::smul_fix:

      ISD = ISD::SMULFIX;

      break;

    case Intrinsic::umul_fix:

      ISD = ISD::UMULFIX;

      break;

    case Intrinsic::sadd_with_overflow:

      ISD = ISD::SADDO;

      break;

    case Intrinsic::ssub_with_overflow:

      ISD = ISD::SSUBO;

      break;

    case Intrinsic::uadd_with_overflow:

      ISD = ISD::UADDO;

      break;

    case Intrinsic::usub_with_overflow:

      ISD = ISD::USUBO;

      break;

    case Intrinsic::smul_with_overflow:

      ISD = ISD::SMULO;

      break;

    case Intrinsic::umul_with_overflow:

      ISD = ISD::UMULO;

      break;

    case Intrinsic::fptosi_sat:

    case Intrinsic::fptoui_sat: {

      std::pair<InstructionCost, MVT> SrcLT = getTypeLegalizationCost(Tys[0]);

      std::pair<InstructionCost, MVT> RetLT = getTypeLegalizationCost(RetTy);


      // For cast instructions, types are different between source and

      // destination. Also need to check if the source type can be legalize.

      if (!SrcLT.first.isValid() || !RetLT.first.isValid())

        return InstructionCost::getInvalid();

      ISD = IID == Intrinsic::fptosi_sat ? ISD::FP_TO_SINT_SAT

                                         : ISD::FP_TO_UINT_SAT;

      break;

    }

    case Intrinsic::ctpop:

      ISD = ISD::CTPOP;

      // In case of legalization use TCC_Expensive. This is cheaper than a

      // library call but still not a cheap instruction.

      SingleCallCost = TargetTransformInfo::TCC_Expensive;

      break;

    case Intrinsic::ctlz:

      ISD = ISD::CTLZ;

      break;

    case Intrinsic::cttz:

      ISD = ISD::CTTZ;

      break;

    case Intrinsic::bswap:

      ISD = ISD::BSWAP;

      break;

    case Intrinsic::bitreverse:

      ISD = ISD::BITREVERSE;

      break;

    case Intrinsic::ucmp:

      ISD = ISD::UCMP;

      break;

    case Intrinsic::scmp:

      ISD = ISD::SCMP;

      break;

    case Intrinsic::clmul:

      ISD = ISD::CLMUL;

      break;

    case Intrinsic::masked_udiv:

    case Intrinsic::masked_sdiv:

    case Intrinsic::masked_urem:

    case Intrinsic::masked_srem: {

      unsigned UnmaskedOpc;

      switch (IID) {

      case Intrinsic::masked_udiv:

        ISD = ISD::MASKED_UDIV;

        UnmaskedOpc = Instruction::UDiv;

        break;

      case Intrinsic::masked_sdiv:

        ISD = ISD::MASKED_SDIV;

        UnmaskedOpc = Instruction::SDiv;

        break;

      case Intrinsic::masked_urem:

        ISD = ISD::MASKED_UREM;

        UnmaskedOpc = Instruction::URem;

        break;

      case Intrinsic::masked_srem:

        ISD = ISD::MASKED_SREM;

        UnmaskedOpc = Instruction::SRem;

        break;

      default:

        llvm_unreachable("Unexpected intrinsic ID");

      }

      InstructionCost Cost =

          thisT()->getArithmeticInstrCost(UnmaskedOpc, RetTy, CostKind);


      // Expansion generates a (select %mask, %rhs, 1) for the divisor.

      MVT LT = getTypeLegalizationCost(RetTy).second;

      if (!getTLI()->isOperationLegalOrCustom(ISD, LT)) {

        Type *CondTy = cast<VectorType>(RetTy)->getWithNewType(

            IntegerType::getInt1Ty(RetTy->getContext()));

        Cost += thisT()->getCmpSelInstrCost(

            BinaryOperator::Select, RetTy, CondTy, CmpInst::BAD_ICMP_PREDICATE,

            CostKind, {}, {TTI::OK_UniformConstantValue, TTI::OP_PowerOf2});

      }


      return Cost;

    }

    }


    auto *ST = dyn_cast<StructType>(RetTy);

    Type *LegalizeTy = ST ? ST->getContainedType(0) : RetTy;

    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(LegalizeTy);


    const TargetLoweringBase *TLI = getTLI();


    if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {

      if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&

          TLI->isFAbsFree(LT.second)) {

        return 0;

      }


      // The operation is legal. Assume it costs 1.

      // If the type is split to multiple registers, assume that there is some

      // overhead to this.

      // TODO: Once we have extract/insert subvector cost we need to use them.

      if (LT.first > 1)

        return (LT.first * 2);

      else

        return (LT.first * 1);

    } else if (TLI->isOperationCustom(ISD, LT.second)) {

      // If the operation is custom lowered then assume

      // that the code is twice as expensive.

      return (LT.first * 2);

    }


    switch (IID) {

    case Intrinsic::fmuladd: {

      // If we can't lower fmuladd into an FMA estimate the cost as a floating

      // point mul followed by an add.


      return thisT()->getArithmeticInstrCost(BinaryOperator::FMul, RetTy,

                                             CostKind) +

             thisT()->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy,

                                             CostKind);

    }

    case Intrinsic::experimental_constrained_fmuladd: {

      IntrinsicCostAttributes FMulAttrs(

        Intrinsic::experimental_constrained_fmul, RetTy, Tys);

      IntrinsicCostAttributes FAddAttrs(

        Intrinsic::experimental_constrained_fadd, RetTy, Tys);

      return thisT()->getIntrinsicInstrCost(FMulAttrs, CostKind) +

             thisT()->getIntrinsicInstrCost(FAddAttrs, CostKind);

    }

    case Intrinsic::smin:

    case Intrinsic::smax:

    case Intrinsic::umin:

    case Intrinsic::umax: {

      // minmax(X,Y) = select(icmp(X,Y),X,Y)

      Type *CondTy = RetTy->getWithNewBitWidth(1);

      bool IsUnsigned = IID == Intrinsic::umax || IID == Intrinsic::umin;

      CmpInst::Predicate Pred =

          IsUnsigned ? CmpInst::ICMP_UGT : CmpInst::ICMP_SGT;

      InstructionCost Cost = 0;

      Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,

                                          Pred, CostKind);

      Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,

                                          Pred, CostKind);

      return Cost;

    }

    case Intrinsic::sadd_with_overflow:

    case Intrinsic::ssub_with_overflow: {

      Type *SumTy = RetTy->getContainedType(0);

      Type *OverflowTy = RetTy->getContainedType(1);

      unsigned Opcode = IID == Intrinsic::sadd_with_overflow

                            ? BinaryOperator::Add

                            : BinaryOperator::Sub;


      //   Add:

      //   Overflow -> (Result < LHS) ^ (RHS < 0)

      //   Sub:

      //   Overflow -> (Result < LHS) ^ (RHS > 0)

      InstructionCost Cost = 0;

      Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);

      Cost +=

          2 * thisT()->getCmpSelInstrCost(Instruction::ICmp, SumTy, OverflowTy,

                                          CmpInst::ICMP_SGT, CostKind);

      Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Xor, OverflowTy,

                                              CostKind);

      return Cost;

    }

    case Intrinsic::uadd_with_overflow:

    case Intrinsic::usub_with_overflow: {

      Type *SumTy = RetTy->getContainedType(0);

      Type *OverflowTy = RetTy->getContainedType(1);

      unsigned Opcode = IID == Intrinsic::uadd_with_overflow

                            ? BinaryOperator::Add

                            : BinaryOperator::Sub;

      CmpInst::Predicate Pred = IID == Intrinsic::uadd_with_overflow

                                    ? CmpInst::ICMP_ULT

                                    : CmpInst::ICMP_UGT;


      InstructionCost Cost = 0;

      Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);

      Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,

                                          OverflowTy, Pred, CostKind);

      return Cost;

    }

    case Intrinsic::smul_with_overflow:

    case Intrinsic::umul_with_overflow: {

      Type *MulTy = RetTy->getContainedType(0);

      Type *OverflowTy = RetTy->getContainedType(1);

      unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;

      Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);

      bool IsSigned = IID == Intrinsic::smul_with_overflow;


      unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;

      TTI::CastContextHint CCH = TTI::CastContextHint::None;


      InstructionCost Cost = 0;

      Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind);

      Cost +=

          thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);

      Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,

                                            CCH, CostKind);

      Cost += thisT()->getArithmeticInstrCost(

          Instruction::LShr, ExtTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},

          {TTI::OK_UniformConstantValue, TTI::OP_None});


      if (IsSigned)

        Cost += thisT()->getArithmeticInstrCost(

            Instruction::AShr, MulTy, CostKind,

            {TTI::OK_AnyValue, TTI::OP_None},

            {TTI::OK_UniformConstantValue, TTI::OP_None});


      Cost += thisT()->getCmpSelInstrCost(

          BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);

      return Cost;

    }

    case Intrinsic::sadd_sat:

    case Intrinsic::ssub_sat: {

      // Assume a default expansion.

      Type *CondTy = RetTy->getWithNewBitWidth(1);


      Type *OpTy = StructType::create({RetTy, CondTy});

      Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat

                                     ? Intrinsic::sadd_with_overflow

                                     : Intrinsic::ssub_with_overflow;

      CmpInst::Predicate Pred = CmpInst::ICMP_SGT;


      // SatMax -> Overflow && SumDiff < 0

      // SatMin -> Overflow && SumDiff >= 0

      InstructionCost Cost = 0;

      IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,

                                    nullptr, ScalarizationCostPassed);

      Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);

      Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,

                                          Pred, CostKind);

      Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy,

                                              CondTy, Pred, CostKind);

      return Cost;

    }

    case Intrinsic::uadd_sat:

    case Intrinsic::usub_sat: {

      Type *CondTy = RetTy->getWithNewBitWidth(1);


      Type *OpTy = StructType::create({RetTy, CondTy});

      Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat

                                     ? Intrinsic::uadd_with_overflow

                                     : Intrinsic::usub_with_overflow;


      InstructionCost Cost = 0;

      IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,

                                    nullptr, ScalarizationCostPassed);

      Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);

      Cost +=

          thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,

                                      CmpInst::BAD_ICMP_PREDICATE, CostKind);

      return Cost;

    }

    case Intrinsic::smul_fix:

    case Intrinsic::umul_fix: {

      unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;

      Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);


      unsigned ExtOp =

          IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;

      TTI::CastContextHint CCH = TTI::CastContextHint::None;


      InstructionCost Cost = 0;

      Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind);

      Cost +=

          thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);

      Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,

                                            CCH, CostKind);

      Cost += thisT()->getArithmeticInstrCost(

          Instruction::LShr, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},

          {TTI::OK_UniformConstantValue, TTI::OP_None});

      Cost += thisT()->getArithmeticInstrCost(

          Instruction::Shl, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},

          {TTI::OK_UniformConstantValue, TTI::OP_None});

      Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind);

      return Cost;

    }

    case Intrinsic::abs: {

      // abs(X) = select(icmp(X,0),X,sub(0,X))

      Type *CondTy = RetTy->getWithNewBitWidth(1);

      CmpInst::Predicate Pred = CmpInst::ICMP_SGT;

      InstructionCost Cost = 0;

      Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,

                                          Pred, CostKind);

      Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,

                                          Pred, CostKind);

      // TODO: Should we add an OperandValueProperties::OP_Zero property?

      Cost += thisT()->getArithmeticInstrCost(

          BinaryOperator::Sub, RetTy, CostKind,

          {TTI::OK_UniformConstantValue, TTI::OP_None});

      return Cost;

    }

    case Intrinsic::fshl:

    case Intrinsic::fshr: {

      // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))

      // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))

      Type *CondTy = RetTy->getWithNewBitWidth(1);

      InstructionCost Cost = 0;

      Cost +=

          thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind);

      Cost +=

          thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy, CostKind);

      Cost +=

          thisT()->getArithmeticInstrCost(BinaryOperator::Shl, RetTy, CostKind);

      Cost += thisT()->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,

                                              CostKind);

      // Non-constant shift amounts requires a modulo. If the typesize is a

      // power-2 then this will be converted to an and, otherwise it will use a

      // urem.

      Cost += thisT()->getArithmeticInstrCost(

          isPowerOf2_32(RetTy->getScalarSizeInBits()) ? BinaryOperator::And

                                                      : BinaryOperator::URem,

          RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},

          {TTI::OK_UniformConstantValue, TTI::OP_None});

      // Shift-by-zero handling.

      Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,

                                          CmpInst::ICMP_EQ, CostKind);

      Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,

                                          CmpInst::ICMP_EQ, CostKind);

      return Cost;

    }

    case Intrinsic::fptosi_sat:

    case Intrinsic::fptoui_sat: {

      if (Tys.empty())

        break;

      Type *FromTy = Tys[0];

      bool IsSigned = IID == Intrinsic::fptosi_sat;


      InstructionCost Cost = 0;

      IntrinsicCostAttributes Attrs1(Intrinsic::minnum, FromTy,

                                     {FromTy, FromTy});

      Cost += thisT()->getIntrinsicInstrCost(Attrs1, CostKind);

      IntrinsicCostAttributes Attrs2(Intrinsic::maxnum, FromTy,

                                     {FromTy, FromTy});

      Cost += thisT()->getIntrinsicInstrCost(Attrs2, CostKind);

      Cost += thisT()->getCastInstrCost(

          IsSigned ? Instruction::FPToSI : Instruction::FPToUI, RetTy, FromTy,

          TTI::CastContextHint::None, CostKind);

      if (IsSigned) {

        Type *CondTy = RetTy->getWithNewBitWidth(1);

        Cost += thisT()->getCmpSelInstrCost(

            BinaryOperator::FCmp, FromTy, CondTy, CmpInst::FCMP_UNO, CostKind);

        Cost += thisT()->getCmpSelInstrCost(

            BinaryOperator::Select, RetTy, CondTy, CmpInst::FCMP_UNO, CostKind);

      }

      return Cost;

    }

    case Intrinsic::ucmp:

    case Intrinsic::scmp: {

      Type *CmpTy = Tys[0];

      Type *CondTy = RetTy->getWithNewBitWidth(1);

      InstructionCost Cost =

          thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, CmpTy, CondTy,

                                      CmpIntrinsic::getGTPredicate(IID),

                                      CostKind) +

          thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, CmpTy, CondTy,

                                      CmpIntrinsic::getLTPredicate(IID),

                                      CostKind);


      EVT VT = TLI->getValueType(DL, CmpTy, true);

      if (TLI->preferSelectsOverBooleanArithmetic(VT)) {

        // x < y ? -1 : (x > y ? 1 : 0)

        Cost += 2 * thisT()->getCmpSelInstrCost(

                        BinaryOperator::Select, RetTy, CondTy,

                        ICmpInst::BAD_ICMP_PREDICATE, CostKind);

      } else {

        // zext(x > y) - zext(x < y)

        Cost +=

            2 * thisT()->getCastInstrCost(CastInst::ZExt, RetTy, CondTy,

                                          TTI::CastContextHint::None, CostKind);

        Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy,

                                                CostKind);

      }

      return Cost;

    }

    case Intrinsic::maximumnum:

    case Intrinsic::minimumnum: {

      // On platform that support FMAXNUM_IEEE/FMINNUM_IEEE, we expand

      // maximumnum/minimumnum to

      //    ARG0 = fcanonicalize ARG0, ARG0  // to quiet ARG0

      //    ARG1 = fcanonicalize ARG1, ARG1  // to quiet ARG1

      //    RESULT = MAXNUM_IEEE ARG0, ARG1  // or MINNUM_IEEE

      // FIXME: In LangRef, we claimed FMAXNUM has the same behaviour of

      //        FMAXNUM_IEEE, while the backend hasn't migrated the code yet.

      //        Finally, we will remove FMAXNUM_IEEE and FMINNUM_IEEE.

      int IeeeISD =

          IID == Intrinsic::maximumnum ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;

      if (TLI->isOperationLegal(IeeeISD, LT.second)) {

        IntrinsicCostAttributes FCanonicalizeAttrs(Intrinsic::canonicalize,

                                                   RetTy, Tys[0]);

        InstructionCost FCanonicalizeCost =

            thisT()->getIntrinsicInstrCost(FCanonicalizeAttrs, CostKind);

        return LT.first + FCanonicalizeCost * 2;

      }

      break;

    }

    case Intrinsic::clmul: {

      // This cost model should match the expansion in

      // TargetLowering::expandCLMUL.

      unsigned BW = RetTy->getScalarSizeInBits();

      InstructionCost AndCost =

          thisT()->getArithmeticInstrCost(Instruction::And, RetTy, CostKind);

      InstructionCost OrCost =

          thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind);

      InstructionCost XorCost =

          thisT()->getArithmeticInstrCost(Instruction::Xor, RetTy, CostKind);

      InstructionCost MulCost =

          thisT()->getArithmeticInstrCost(Instruction::Mul, RetTy, CostKind);


      // When the multiplication with holes approach is used, that emits 16

      // MULs, 8 + 4 ANDs, 12 XORs and 3 ORs.

      if (BW >= 32 && BW <= 64 &&

          TLI->isOperationLegalOrCustom(ISD::MUL,

                                        TLI->getValueType(DL, RetTy))) {

        return 16 * MulCost + 12 * AndCost + 12 * XorCost + 3 * OrCost;

      }


      InstructionCost PerBitCostMul = AndCost + MulCost + XorCost;

      InstructionCost PerBitCostBittest =

          AndCost +

          thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, RetTy,

                                      ICmpInst::BAD_ICMP_PREDICATE, CostKind) +

          thisT()->getCmpSelInstrCost(Instruction::ICmp, RetTy, RetTy,

                                      ICmpInst::ICMP_NE, CostKind);

      InstructionCost PerBitCost = std::min(PerBitCostMul, PerBitCostBittest);

      return BW * PerBitCost;

    }

    default:

      break;

    }


    // Else, assume that we need to scalarize this intrinsic. For math builtins

    // this will emit a costly libcall, adding call overhead and spills. Make it

    // very expensive.

    if (isVectorizedTy(RetTy)) {

      ArrayRef<Type *> RetVTys = getContainedTypes(RetTy);


      // Scalable vectors cannot be scalarized, so return Invalid.

      if (any_of(concat<Type *const>(RetVTys, Tys),

                 [](Type *Ty) { return isa<ScalableVectorType>(Ty); }))

        return InstructionCost::getInvalid();


      InstructionCost ScalarizationCost = ScalarizationCostPassed;

      if (!SkipScalarizationCost) {

        ScalarizationCost = 0;

        for (Type *RetVTy : RetVTys) {

          ScalarizationCost += getScalarizationOverhead(

              cast<VectorType>(RetVTy), /*Insert=*/true,

              /*Extract=*/false, CostKind);

        }

      }


      unsigned ScalarCalls = getVectorizedTypeVF(RetTy).getFixedValue();

      SmallVector<Type *, 4> ScalarTys;

      for (Type *Ty : Tys) {

        if (Ty->isVectorTy())

          Ty = Ty->getScalarType();

        ScalarTys.push_back(Ty);

      }

      IntrinsicCostAttributes Attrs(IID, toScalarizedTy(RetTy), ScalarTys, FMF);

      InstructionCost ScalarCost =

          thisT()->getIntrinsicInstrCost(Attrs, CostKind);

      for (Type *Ty : Tys) {

        if (auto *VTy = dyn_cast<VectorType>(Ty)) {

          if (!ICA.skipScalarizationCost())

            ScalarizationCost += getScalarizationOverhead(

                VTy, /*Insert*/ false, /*Extract*/ true, CostKind);

          ScalarCalls = std::max(ScalarCalls,

                                 cast<FixedVectorType>(VTy)->getNumElements());

        }

      }

      return ScalarCalls * ScalarCost + ScalarizationCost;

    }


    // This is going to be turned into a library call, make it expensive.

    return SingleCallCost;

  }


  /// Get memory intrinsic cost based on arguments.

  InstructionCost


  getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA,

                           TTI::TargetCostKind CostKind) const override {

    unsigned Id = MICA.getID();

    Type *DataTy = MICA.getDataType();

    bool VariableMask = MICA.getVariableMask();

    Align Alignment = MICA.getAlignment();


    switch (Id) {

    case Intrinsic::experimental_vp_strided_load:

    case Intrinsic::experimental_vp_strided_store: {

      unsigned Opcode = Id == Intrinsic::experimental_vp_strided_load

                            ? Instruction::Load

                            : Instruction::Store;

      // For a target without strided memory operations (or for an illegal

      // operation type on one which does), assume we lower to a gather/scatter

      // operation.  (Which may in turn be scalarized.)

      return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment,

                                         VariableMask, true, CostKind);

    }

    case Intrinsic::masked_scatter:

    case Intrinsic::masked_gather:

    case Intrinsic::vp_scatter:

    case Intrinsic::vp_gather: {

      unsigned Opcode = (MICA.getID() == Intrinsic::masked_gather ||

                         MICA.getID() == Intrinsic::vp_gather)

                            ? Instruction::Load

                            : Instruction::Store;


      return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment,

                                         VariableMask, true, CostKind);

    }

    case Intrinsic::vp_load:

    case Intrinsic::vp_store:

      return InstructionCost::getInvalid();

    case Intrinsic::masked_load:

    case Intrinsic::masked_store: {

      unsigned Opcode =

          Id == Intrinsic::masked_load ? Instruction::Load : Instruction::Store;

      // TODO: Pass on AddressSpace when we have test coverage.

      return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, true, false,

                                         CostKind);

    }

    case Intrinsic::masked_compressstore:

    case Intrinsic::masked_expandload: {

      unsigned Opcode = MICA.getID() == Intrinsic::masked_expandload

                            ? Instruction::Load

                            : Instruction::Store;

      // Treat expand load/compress store as gather/scatter operation.

      // TODO: implement more precise cost estimation for these intrinsics.

      return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment,

                                         VariableMask,

                                         /*IsGatherScatter*/ true, CostKind);

    }

    case Intrinsic::vp_load_ff:

      return InstructionCost::getInvalid();

    default:

      llvm_unreachable("unexpected intrinsic");

    }

  }


  /// Compute a cost of the given call instruction.

  ///

  /// Compute the cost of calling function F with return type RetTy and

  /// argument types Tys. F might be nullptr, in this case the cost of an

  /// arbitrary call with the specified signature will be returned.

  /// This is used, for instance,  when we estimate call of a vector

  /// counterpart of the given function.

  /// \param F Called function, might be nullptr.

  /// \param RetTy Return value types.

  /// \param Tys Argument types.

  /// \returns The cost of Call instruction.

  InstructionCost


  getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys,

                   TTI::TargetCostKind CostKind) const override {

    return 10;

  }


  unsigned getNumberOfParts(Type *Tp) const override {

    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);

    if (!LT.first.isValid())

      return 0;

    // Try to find actual number of parts for non-power-of-2 elements as

    // ceil(num-of-elements/num-of-subtype-elements).

    if (auto *FTp = dyn_cast<FixedVectorType>(Tp);

        Tp && LT.second.isFixedLengthVector() &&

        !has_single_bit(FTp->getNumElements())) {

      if (auto *SubTp = dyn_cast_if_present<FixedVectorType>(

              EVT(LT.second).getTypeForEVT(Tp->getContext()));

          SubTp && SubTp->getElementType() == FTp->getElementType())

        return divideCeil(FTp->getNumElements(), SubTp->getNumElements());

    }

    return LT.first.getValue();

  }


  InstructionCost


  getAddressComputationCost(Type *PtrTy, ScalarEvolution *, const SCEV *,

                            TTI::TargetCostKind) const override {

    return 0;

  }


  /// Try to calculate arithmetic and shuffle op costs for reduction intrinsics.

  /// We're assuming that reduction operation are performing the following way:

  ///

  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,

  /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>

  ///            \----------------v-------------/  \----------v------------/

  ///                            n/2 elements               n/2 elements

  /// %red1 = op <n x t> %val, <n x t> val1

  /// After this operation we have a vector %red1 where only the first n/2

  /// elements are meaningful, the second n/2 elements are undefined and can be

  /// dropped. All other operations are actually working with the vector of

  /// length n/2, not n, though the real vector length is still n.

  /// %val2 = shufflevector<n x t> %red1, <n x t> %undef,

  /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>

  ///            \----------------v-------------/  \----------v------------/

  ///                            n/4 elements               3*n/4 elements

  /// %red2 = op <n x t> %red1, <n x t> val2  - working with the vector of

  /// length n/2, the resulting vector has length n/4 etc.

  ///

  /// The cost model should take into account that the actual length of the

  /// vector is reduced on each iteration.


  InstructionCost getTreeReductionCost(unsigned Opcode, VectorType *Ty,

                                       TTI::TargetCostKind CostKind) const {

    // Targets must implement a default value for the scalable case, since

    // we don't know how many lanes the vector has.

    if (isa<ScalableVectorType>(Ty))

      return InstructionCost::getInvalid();


    Type *ScalarTy = Ty->getElementType();

    unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();

    if ((Opcode == Instruction::Or || Opcode == Instruction::And) &&

        ScalarTy == IntegerType::getInt1Ty(Ty->getContext()) &&

        NumVecElts >= 2) {

      // Or reduction for i1 is represented as:

      // %val = bitcast <ReduxWidth x i1> to iReduxWidth

      // %res = cmp ne iReduxWidth %val, 0

      // And reduction for i1 is represented as:

      // %val = bitcast <ReduxWidth x i1> to iReduxWidth

      // %res = cmp eq iReduxWidth %val, 11111

      Type *ValTy = IntegerType::get(Ty->getContext(), NumVecElts);

      return thisT()->getCastInstrCost(Instruction::BitCast, ValTy, Ty,

                                       TTI::CastContextHint::None, CostKind) +

             thisT()->getCmpSelInstrCost(Instruction::ICmp, ValTy,

                                         CmpInst::makeCmpResultType(ValTy),

                                         CmpInst::BAD_ICMP_PREDICATE, CostKind);

    }

    unsigned NumReduxLevels = Log2_32(NumVecElts);

    InstructionCost ArithCost = 0;

    InstructionCost ShuffleCost = 0;

    std::pair<InstructionCost, MVT> LT = thisT()->getTypeLegalizationCost(Ty);

    unsigned LongVectorCount = 0;

    unsigned MVTLen =

        LT.second.isVector() ? LT.second.getVectorNumElements() : 1;

    while (NumVecElts > MVTLen) {

      NumVecElts /= 2;

      VectorType *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);

      ShuffleCost += thisT()->getShuffleCost(

          TTI::SK_ExtractSubvector, SubTy, Ty, {}, CostKind, NumVecElts, SubTy);

      ArithCost += thisT()->getArithmeticInstrCost(Opcode, SubTy, CostKind);

      Ty = SubTy;

      ++LongVectorCount;

    }


    NumReduxLevels -= LongVectorCount;


    // The minimal length of the vector is limited by the real length of vector

    // operations performed on the current platform. That's why several final

    // reduction operations are performed on the vectors with the same

    // architecture-dependent length.


    // By default reductions need one shuffle per reduction level.

    ShuffleCost +=

        NumReduxLevels * thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,

                                                 Ty, {}, CostKind, 0, Ty);

    ArithCost +=

        NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty, CostKind);

    return ShuffleCost + ArithCost +

           thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty,

                                       CostKind, 0, nullptr, nullptr);

  }


  /// Try to calculate the cost of performing strict (in-order) reductions,

  /// which involves doing a sequence of floating point additions in lane

  /// order, starting with an initial value. For example, consider a scalar

  /// initial value 'InitVal' of type float and a vector of type <4 x float>:

  ///

  ///   Vector = <float %v0, float %v1, float %v2, float %v3>

  ///

  ///   %add1 = %InitVal + %v0

  ///   %add2 = %add1 + %v1

  ///   %add3 = %add2 + %v2

  ///   %add4 = %add3 + %v3

  ///

  /// As a simple estimate we can say the cost of such a reduction is 4 times

  /// the cost of a scalar FP addition. We can only estimate the costs for

  /// fixed-width vectors here because for scalable vectors we do not know the

  /// runtime number of operations.


  InstructionCost getOrderedReductionCost(unsigned Opcode, VectorType *Ty,

                                          TTI::TargetCostKind CostKind) const {

    // Targets must implement a default value for the scalable case, since

    // we don't know how many lanes the vector has.

    if (isa<ScalableVectorType>(Ty))

      return InstructionCost::getInvalid();


    auto *VTy = cast<FixedVectorType>(Ty);

    InstructionCost ExtractCost = getScalarizationOverhead(

        VTy, /*Insert=*/false, /*Extract=*/true, CostKind);

    InstructionCost ArithCost = thisT()->getArithmeticInstrCost(

        Opcode, VTy->getElementType(), CostKind);

    ArithCost *= VTy->getNumElements();


    return ExtractCost + ArithCost;

  }


  InstructionCost


  getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,

                             std::optional<FastMathFlags> FMF,

                             TTI::TargetCostKind CostKind) const override {

    assert(Ty && "Unknown reduction vector type");

    if (TTI::requiresOrderedReduction(FMF))

      return getOrderedReductionCost(Opcode, Ty, CostKind);

    return getTreeReductionCost(Opcode, Ty, CostKind);

  }


  /// Try to calculate op costs for min/max reduction operations.

  /// \param CondTy Conditional type for the Select instruction.

  InstructionCost


  getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,

                         TTI::TargetCostKind CostKind) const override {

    // Targets must implement a default value for the scalable case, since

    // we don't know how many lanes the vector has.

    if (isa<ScalableVectorType>(Ty))

      return InstructionCost::getInvalid();


    Type *ScalarTy = Ty->getElementType();

    unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();

    unsigned NumReduxLevels = Log2_32(NumVecElts);

    InstructionCost MinMaxCost = 0;

    InstructionCost ShuffleCost = 0;

    std::pair<InstructionCost, MVT> LT = thisT()->getTypeLegalizationCost(Ty);

    unsigned LongVectorCount = 0;

    unsigned MVTLen =

        LT.second.isVector() ? LT.second.getVectorNumElements() : 1;

    while (NumVecElts > MVTLen) {

      NumVecElts /= 2;

      auto *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);


      ShuffleCost += thisT()->getShuffleCost(

          TTI::SK_ExtractSubvector, SubTy, Ty, {}, CostKind, NumVecElts, SubTy);


      IntrinsicCostAttributes Attrs(IID, SubTy, {SubTy, SubTy}, FMF);

      MinMaxCost += getIntrinsicInstrCost(Attrs, CostKind);

      Ty = SubTy;

      ++LongVectorCount;

    }


    NumReduxLevels -= LongVectorCount;


    // The minimal length of the vector is limited by the real length of vector

    // operations performed on the current platform. That's why several final

    // reduction opertions are perfomed on the vectors with the same

    // architecture-dependent length.

    ShuffleCost +=

        NumReduxLevels * thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,

                                                 Ty, {}, CostKind, 0, Ty);

    IntrinsicCostAttributes Attrs(IID, Ty, {Ty, Ty}, FMF);

    MinMaxCost += NumReduxLevels * getIntrinsicInstrCost(Attrs, CostKind);

    // The last min/max should be in vector registers and we counted it above.

    // So just need a single extractelement.

    return ShuffleCost + MinMaxCost +

           thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty,

                                       CostKind, 0, nullptr, nullptr);

  }


  InstructionCost


  getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy,

                           VectorType *Ty, std::optional<FastMathFlags> FMF,

                           TTI::TargetCostKind CostKind) const override {

    if (auto *FTy = dyn_cast<FixedVectorType>(Ty);

        FTy && IsUnsigned && Opcode == Instruction::Add &&

        FTy->getElementType() == IntegerType::getInt1Ty(Ty->getContext())) {

      // Represent vector_reduce_add(ZExt(<n x i1>)) as

      // ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).

      auto *IntTy =

          IntegerType::get(ResTy->getContext(), FTy->getNumElements());

      IntrinsicCostAttributes ICA(Intrinsic::ctpop, IntTy, {IntTy},

                                  FMF ? *FMF : FastMathFlags());

      return thisT()->getCastInstrCost(Instruction::BitCast, IntTy, FTy,

                                       TTI::CastContextHint::None, CostKind) +

             thisT()->getIntrinsicInstrCost(ICA, CostKind);

    }

    // Without any native support, this is equivalent to the cost of

    // vecreduce.opcode(ext(Ty A)).

    VectorType *ExtTy = VectorType::get(ResTy, Ty);

    InstructionCost RedCost =

        thisT()->getArithmeticReductionCost(Opcode, ExtTy, FMF, CostKind);

    InstructionCost ExtCost = thisT()->getCastInstrCost(

        IsUnsigned ? Instruction::ZExt : Instruction::SExt, ExtTy, Ty,

        TTI::CastContextHint::None, CostKind);


    return RedCost + ExtCost;

  }


  InstructionCost


  getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy,

                         VectorType *Ty,

                         TTI::TargetCostKind CostKind) const override {

    // Without any native support, this is equivalent to the cost of

    // vecreduce.add(mul(ext(Ty A), ext(Ty B))) or

    // vecreduce.add(mul(A, B)).

    assert((RedOpcode == Instruction::Add || RedOpcode == Instruction::Sub) &&

           "The reduction opcode is expected to be Add or Sub.");

    VectorType *ExtTy = VectorType::get(ResTy, Ty);

    InstructionCost RedCost = thisT()->getArithmeticReductionCost(

        RedOpcode, ExtTy, std::nullopt, CostKind);

    InstructionCost ExtCost = thisT()->getCastInstrCost(

        IsUnsigned ? Instruction::ZExt : Instruction::SExt, ExtTy, Ty,

        TTI::CastContextHint::None, CostKind);


    InstructionCost MulCost =

        thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);


    return RedCost + MulCost + 2 * ExtCost;

  }


  InstructionCost getPartialReductionCost(

      unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,

      ElementCount VF, TTI::PartialReductionExtendKind OpAExtend,

      TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,

      TTI::TargetCostKind CostKind,

      std::optional<FastMathFlags> FMF) const override {

    unsigned EltSizeAcc = AccumType->getScalarSizeInBits();

    unsigned EltSizeInA = InputTypeA->getScalarSizeInBits();

    unsigned Ratio = EltSizeAcc / EltSizeInA;

    if (VF.getKnownMinValue() <= Ratio || VF.getKnownMinValue() % Ratio != 0 ||

        EltSizeAcc % EltSizeInA != 0 || (BinOp && InputTypeA != InputTypeB))

      return InstructionCost::getInvalid();


    Type *InputVectorType = VectorType::get(InputTypeA, VF);

    Type *ExtInputVectorType = VectorType::get(AccumType, VF);

    Type *AccumVectorType =

        VectorType::get(AccumType, VF.divideCoefficientBy(Ratio));


    InstructionCost ExtendCostA = 0;

    if (OpAExtend != TTI::PartialReductionExtendKind::PR_None)

      ExtendCostA = getCastInstrCost(

          TTI::getOpcodeForPartialReductionExtendKind(OpAExtend),

          ExtInputVectorType, InputVectorType, TTI::CastContextHint::None,

          CostKind);


    // TODO: add cost of extracting subvectors from the source vector that

    // is to be partially reduced.

    InstructionCost ReductionOpCost =

        Ratio * getArithmeticInstrCost(Opcode, AccumVectorType, CostKind);


    if (!BinOp)

      return ExtendCostA + ReductionOpCost;


    InstructionCost ExtendCostB = 0;

    if (OpBExtend != TTI::PartialReductionExtendKind::PR_None)

      ExtendCostB = getCastInstrCost(

          TTI::getOpcodeForPartialReductionExtendKind(OpBExtend),

          ExtInputVectorType, InputVectorType, TTI::CastContextHint::None,

          CostKind);

    return ExtendCostA + ExtendCostB + ReductionOpCost +

           getArithmeticInstrCost(*BinOp, ExtInputVectorType, CostKind);

  }


  InstructionCost getVectorSplitCost() const { return 1; }


  /// @}

};


/// Concrete BasicTTIImpl that can be used if no further customization

/// is needed.


class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {

  using BaseT = BasicTTIImplBase<BasicTTIImpl>;


  friend class BasicTTIImplBase<BasicTTIImpl>;


  const TargetSubtargetInfo *ST;

  const TargetLoweringBase *TLI;


  const TargetSubtargetInfo *getST() const { return ST; }

  const TargetLoweringBase *getTLI() const { return TLI; }


public:

  LLVM_ABI explicit BasicTTIImpl(const TargetMachine *TM, const Function &F);

};


} // end namespace llvm


#endif // LLVM_CODEGEN_BASICTTIIMPL_H

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

APInt.h
This file implements a class to represent arbitrary precision integral constant values and operations...

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition ARMSLSHardening.cpp:73

Alignment.h

X
#define X(NUM, ENUM, NAME)
Definition ELF.h:856

BitVector.h
This file implements the BitVector class.

Casting.h

CommandLine.h

LLVM_ABI
#define LLVM_ABI
Definition Compiler.h:215

Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...

CostKind
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))

IntrinsicCostStrategy::InstructionCost
@ InstructionCost
Definition CostModel.cpp:51

DataLayout.h

DerivedTypes.h

BasicBlock.h

Constant.h

Instruction.h

Operator.h

Type.h

Value.h

ISDOpcodes.h

InstrTypes.h

Instructions.h

Intrinsics.h

Ops
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Definition ItaniumDemangle.h:3391

LoopInfo.h

LoopUtils.h

F
#define F(x, y, z)
Definition MD5.cpp:54

I
#define I(x, y, z)
Definition MD5.cpp:57

MachineValueType.h

MathExtras.h

getCalledFunction
static const Function * getCalledFunction(const Value *V)
Definition MemoryBuiltins.cpp:157

T
#define T
Definition Mips16ISelLowering.cpp:282

Range
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))

II
uint64_t IntrinsicInst * II
Definition NVVMIntrRange.cpp:46

OptimizationRemarkEmitter.h

P
#define P(N)

getNumElements
static unsigned getNumElements(Type *Ty)
Definition SLPVectorizer.cpp:342

getValueType
static Type * getValueType(Value *V, bool LookThroughCmp=false)
Returns the "element type" of the given value/instruction V.
Definition SLPVectorizer.cpp:327

STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.

SmallPtrSet.h
This file defines the SmallPtrSet class.

SmallVector.h
This file defines the SmallVector class.

Y
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

getType
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39

TargetLibraryInfo.h

TargetLowering.h
This file describes how to lower LLVM code to machine code.

TargetOptions.h

TargetSubtargetInfo.h

TargetTransformInfoImpl.h
This file provides helpers for the implementation of a TargetTransformInfo-conforming class.

TargetTransformInfo.h
This pass exposes codegen information to IR-level passes.

ValueTracking.h

ValueTypes.h

llvm::APInt
Class for arbitrary precision integers.
Definition APInt.h:78

llvm::APInt::getAllOnes
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235

llvm::APInt::setBit
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353

llvm::APInt::sgt
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1208

llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511

llvm::APInt::slt
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1137

llvm::APInt::getZero
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201

llvm::AllocaInst
an instruction to allocate memory on the stack
Definition Instructions.h:65

llvm::ArrayRef
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40

llvm::ArrayRef::drop_front
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:194

llvm::ArrayRef::size
size_t size() const
Get the array size.
Definition ArrayRef.h:141

llvm::ArrayRef::drop_back
ArrayRef< T > drop_back(size_t N=1) const
Drop the last N elements of the array.
Definition ArrayRef.h:200

llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition AssumptionCache.h:44

llvm::BasicBlock
LLVM Basic Block Representation.
Definition BasicBlock.h:62

llvm::BasicTTIImplBase::getFPOpCost
InstructionCost getFPOpCost(Type *Ty) const override
Definition BasicTTIImpl.h:687

llvm::BasicTTIImplBase::preferToKeepConstantsAttached
bool preferToKeepConstantsAttached(const Instruction &Inst, const Function &Fn) const override
Definition BasicTTIImpl.h:697

llvm::BasicTTIImplBase::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
Definition BasicTTIImpl.h:1614

llvm::BasicTTIImplBase::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
Definition BasicTTIImpl.h:1064

llvm::BasicTTIImplBase::getMinMaxReductionCost
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
Try to calculate op costs for min/max reduction operations.
Definition BasicTTIImpl.h:3417

llvm::BasicTTIImplBase::isIndexedLoadLegal
bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty) const override
Definition BasicTTIImpl.h:520

llvm::BasicTTIImplBase::getGEPCost
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind) const override
Definition BasicTTIImpl.h:582

llvm::BasicTTIImplBase::getCallerAllocaCost
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const override
Definition BasicTTIImpl.h:720

llvm::BasicTTIImplBase::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
Definition BasicTTIImpl.h:1419

llvm::BasicTTIImplBase::getRegisterBitWidth
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
Definition BasicTTIImpl.h:899

llvm::BasicTTIImplBase::shouldBuildLookupTables
bool shouldBuildLookupTables() const override
Definition BasicTTIImpl.h:647

llvm::BasicTTIImplBase::isNoopAddrSpaceCast
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override
Definition BasicTTIImpl.h:445

llvm::BasicTTIImplBase::isProfitableToHoist
bool isProfitableToHoist(Instruction *I) const override
Definition BasicTTIImpl.h:566

llvm::BasicTTIImplBase::getNumberOfParts
unsigned getNumberOfParts(Type *Tp) const override
Definition BasicTTIImpl.h:3267

llvm::BasicTTIImplBase::getMinPrefetchStride
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const override
Definition BasicTTIImpl.h:873

llvm::BasicTTIImplBase::useAA
bool useAA() const override
Definition BasicTTIImpl.h:570

llvm::BasicTTIImplBase::getPrefetchDistance
unsigned getPrefetchDistance() const override
Definition BasicTTIImpl.h:869

llvm::BasicTTIImplBase::improveShuffleKindFromMask
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
Definition BasicTTIImpl.h:1142

llvm::BasicTTIImplBase::getOperandsScalarizationOverhead
InstructionCost getOperandsScalarizationOverhead(ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
Estimate the overhead of scalarizing an instruction's operands.
Definition BasicTTIImpl.h:982

llvm::BasicTTIImplBase::isLegalAddScalableImmediate
bool isLegalAddScalableImmediate(int64_t Imm) const override
Definition BasicTTIImpl.h:472

llvm::BasicTTIImplBase::getAssumedAddrSpace
unsigned getAssumedAddrSpace(const Value *V) const override
Definition BasicTTIImpl.h:449

llvm::BasicTTIImplBase::simplifyDemandedUseBitsIntrinsic
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const override
Definition BasicTTIImpl.h:831

llvm::BasicTTIImplBase::isLegalAddressingMode
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
Definition BasicTTIImpl.h:480

llvm::BasicTTIImplBase::addrspacesMayAlias
bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const override
Definition BasicTTIImpl.h:431

llvm::BasicTTIImplBase::areInlineCompatible
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
Definition BasicTTIImpl.h:396

llvm::BasicTTIImplBase::isIndexedStoreLegal
bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty) const override
Definition BasicTTIImpl.h:525

llvm::BasicTTIImplBase::haveFastSqrt
bool haveFastSqrt(Type *Ty) const override
Definition BasicTTIImpl.h:678

llvm::BasicTTIImplBase::collectFlatAddressOperands
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const override
Definition BasicTTIImpl.h:440

llvm::BasicTTIImplBase::getShuffleCost
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
Definition BasicTTIImpl.h:1198

llvm::BasicTTIImplBase::getEstimatedNumberOfCaseClusters
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JumpTableSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const override
Definition BasicTTIImpl.h:588

llvm::BasicTTIImplBase::getStoreMinimumVF
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy, Align Alignment, unsigned AddrSpace) const override
Definition BasicTTIImpl.h:497

llvm::BasicTTIImplBase::rewriteIntrinsicWithAddressSpace
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const override
Definition BasicTTIImpl.h:463

llvm::BasicTTIImplBase::adjustInliningThreshold
unsigned adjustInliningThreshold(const CallBase *CB) const override
Definition BasicTTIImpl.h:717

llvm::BasicTTIImplBase::getInliningThresholdMultiplier
unsigned getInliningThresholdMultiplier() const override
Definition BasicTTIImpl.h:716

llvm::BasicTTIImplBase::getScalarizationOverhead
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
Estimate the overhead of scalarizing an instruction.
Definition BasicTTIImpl.h:912

llvm::BasicTTIImplBase::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Scalar, ArrayRef< std::tuple< Value *, User *, int > > ScalarUserAndIdx, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
Definition BasicTTIImpl.h:1488

llvm::BasicTTIImplBase::getPreferredLargeGEPBaseOffset
int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset)
Definition BasicTTIImpl.h:493

llvm::BasicTTIImplBase::shouldBuildRelLookupTables
bool shouldBuildRelLookupTables() const override
Definition BasicTTIImpl.h:653

llvm::BasicTTIImplBase::isTargetIntrinsicWithStructReturnOverloadAtField
bool isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx) const override
Definition BasicTTIImpl.h:959

llvm::BasicTTIImplBase::getArithmeticReductionCost
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
Definition BasicTTIImpl.h:3405

llvm::BasicTTIImplBase::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Definition BasicTTIImpl.h:1424

llvm::BasicTTIImplBase::getVectorInstrCost
InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
Definition BasicTTIImpl.h:1499

llvm::BasicTTIImplBase::getScalingFactorCost
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
Definition BasicTTIImpl.h:547

llvm::BasicTTIImplBase::getEpilogueVectorizationMinVF
unsigned getEpilogueVectorizationMinVF() const override
Definition BasicTTIImpl.h:813

llvm::BasicTTIImplBase::getExtractWithExtendCost
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const override
Definition BasicTTIImpl.h:1409

llvm::BasicTTIImplBase::getVectorSplitCost
InstructionCost getVectorSplitCost() const
Definition BasicTTIImpl.h:3558

llvm::BasicTTIImplBase::isTruncateFree
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
Definition BasicTTIImpl.h:562

llvm::BasicTTIImplBase::getMaxVScale
std::optional< unsigned > getMaxVScale() const override
Definition BasicTTIImpl.h:903

llvm::BasicTTIImplBase::getFlatAddressSpace
unsigned getFlatAddressSpace() const override
Definition BasicTTIImpl.h:435

llvm::BasicTTIImplBase::getCallInstrCost
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
Compute a cost of the given call instruction.
Definition BasicTTIImpl.h:3262

llvm::BasicTTIImplBase::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
Definition BasicTTIImpl.h:727

llvm::BasicTTIImplBase::getTreeReductionCost
InstructionCost getTreeReductionCost(unsigned Opcode, VectorType *Ty, TTI::TargetCostKind CostKind) const
Try to calculate arithmetic and shuffle op costs for reduction intrinsics.
Definition BasicTTIImpl.h:3311

llvm::BasicTTIImplBase::~BasicTTIImplBase
~BasicTTIImplBase() override=default

llvm::BasicTTIImplBase::getPredicatedAddrSpace
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const override
Definition BasicTTIImpl.h:459

llvm::BasicTTIImplBase::getMaxPrefetchIterationsAhead
unsigned getMaxPrefetchIterationsAhead() const override
Definition BasicTTIImpl.h:881

llvm::BasicTTIImplBase::getMaxInterleaveFactor
unsigned getMaxInterleaveFactor(ElementCount VF, bool HasUnorderedReductions) const override
Definition BasicTTIImpl.h:1059

llvm::BasicTTIImplBase::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
Definition BasicTTIImpl.h:799

llvm::BasicTTIImplBase::getTypeBasedIntrinsicInstrCost
InstructionCost getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
Get intrinsic cost based on argument types.
Definition BasicTTIImpl.h:2288

llvm::BasicTTIImplBase::hasBranchDivergence
bool hasBranchDivergence(const Function *F=nullptr) const override
Definition BasicTTIImpl.h:423

llvm::BasicTTIImplBase::getOrderedReductionCost
InstructionCost getOrderedReductionCost(unsigned Opcode, VectorType *Ty, TTI::TargetCostKind CostKind) const
Try to calculate the cost of performing strict (in-order) reductions, which involves doing a sequence...
Definition BasicTTIImpl.h:3387

llvm::BasicTTIImplBase::getCacheAssociativity
std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const override
Definition BasicTTIImpl.h:855

llvm::BasicTTIImplBase::shouldPrefetchAddressSpace
bool shouldPrefetchAddressSpace(unsigned AS) const override
Definition BasicTTIImpl.h:889

llvm::BasicTTIImplBase::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, unsigned *Fast) const override
Definition BasicTTIImpl.h:388

llvm::BasicTTIImplBase::getCacheLineSize
unsigned getCacheLineSize() const override
Definition BasicTTIImpl.h:865

llvm::BasicTTIImplBase::instCombineIntrinsic
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
Definition BasicTTIImpl.h:826

llvm::BasicTTIImplBase::shouldDropLSRSolutionIfLessProfitable
bool shouldDropLSRSolutionIfLessProfitable() const override
Definition BasicTTIImpl.h:539

llvm::BasicTTIImplBase::getInlinerVectorBonusPercent
int getInlinerVectorBonusPercent() const override
Definition BasicTTIImpl.h:725

llvm::BasicTTIImplBase::getMulAccReductionCost
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override
Definition BasicTTIImpl.h:3494

llvm::BasicTTIImplBase::getIndexedVectorInstrCostFromEnd
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
Definition BasicTTIImpl.h:1517

llvm::BasicTTIImplBase::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
Definition BasicTTIImpl.h:1227

llvm::BasicTTIImplBase::getTypeLegalizationCost
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
Definition BasicTTIImpl.h:1025

llvm::BasicTTIImplBase::getPartialReductionCost
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const override
Definition BasicTTIImpl.h:3515

llvm::BasicTTIImplBase::isLegalAddImmediate
bool isLegalAddImmediate(int64_t imm) const override
Definition BasicTTIImpl.h:468

llvm::BasicTTIImplBase::getReplicationShuffleCost
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind) const override
Definition BasicTTIImpl.h:1531

llvm::BasicTTIImplBase::isSingleThreaded
bool isSingleThreaded() const override
Definition BasicTTIImpl.h:453

llvm::BasicTTIImplBase::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
Definition BasicTTIImpl.h:1477

llvm::BasicTTIImplBase::isProfitableLSRChainElement
bool isProfitableLSRChainElement(Instruction *I) const override
Definition BasicTTIImpl.h:543

llvm::BasicTTIImplBase::isValidAddrSpaceCast
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override
Definition BasicTTIImpl.h:427

llvm::BasicTTIImplBase::isTargetIntrinsicWithOverloadTypeAtArg
bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx) const override
Definition BasicTTIImpl.h:953

llvm::BasicTTIImplBase::isTargetIntrinsicWithScalarOpAtArg
bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx) const override
Definition BasicTTIImpl.h:948

llvm::BasicTTIImplBase::getVScaleForTuning
std::optional< unsigned > getVScaleForTuning() const override
Definition BasicTTIImpl.h:904

llvm::BasicTTIImplBase::getExtendedReductionCost
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
Definition BasicTTIImpl.h:3465

llvm::BasicTTIImplBase::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
Definition BasicTTIImpl.h:1762

llvm::BasicTTIImplBase::preferTailFoldingOverEpilogue
bool preferTailFoldingOverEpilogue(TailFoldingInfo *TFI) const override
Definition BasicTTIImpl.h:817

llvm::BasicTTIImplBase::simplifyDemandedVectorEltsIntrinsic
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
Definition BasicTTIImpl.h:838

llvm::BasicTTIImplBase::getAddressComputationCost
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *, const SCEV *, TTI::TargetCostKind) const override
Definition BasicTTIImpl.h:3285

llvm::BasicTTIImplBase::isFCmpOrdCheaperThanFCmpZero
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const override
Definition BasicTTIImpl.h:685

llvm::BasicTTIImplBase::getScalarizationOverhead
InstructionCost getScalarizationOverhead(VectorType *RetTy, ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
Estimate the overhead of scalarizing the inputs and outputs of an instruction, with return type RetTy...
Definition BasicTTIImpl.h:1006

llvm::BasicTTIImplBase::getPreferredTailFoldingStyle
TailFoldingStyle getPreferredTailFoldingStyle() const override
Definition BasicTTIImpl.h:821

llvm::BasicTTIImplBase::getCacheSize
std::optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const override
Definition BasicTTIImpl.h:849

llvm::BasicTTIImplBase::isLegalICmpImmediate
bool isLegalICmpImmediate(int64_t imm) const override
Definition BasicTTIImpl.h:476

llvm::BasicTTIImplBase::isHardwareLoopProfitable
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const override
Definition BasicTTIImpl.h:807

llvm::BasicTTIImplBase::getRegUsageForType
unsigned getRegUsageForType(Type *Ty) const override
Definition BasicTTIImpl.h:577

llvm::BasicTTIImplBase::DL
const DataLayout & DL
Definition TargetTransformInfoImpl.h:40

llvm::BasicTTIImplBase::getMemIntrinsicInstrCost
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
Get memory intrinsic cost based on arguments.
Definition BasicTTIImpl.h:3190

llvm::BasicTTIImplBase::BasicTTIImplBase
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
Definition BasicTTIImpl.h:378

llvm::BasicTTIImplBase::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Definition BasicTTIImpl.h:1563

llvm::BasicTTIImplBase::isTypeLegal
bool isTypeLegal(Type *Ty) const override
Definition BasicTTIImpl.h:572

llvm::BasicTTIImplBase::enableWritePrefetching
bool enableWritePrefetching() const override
Definition BasicTTIImpl.h:885

llvm::BasicTTIImplBase::isLSRCostLess
bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const override
Definition BasicTTIImpl.h:530

llvm::BasicTTIImplBase::getScalarizationOverhead
InstructionCost getScalarizationOverhead(VectorType *InTy, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const
Helper wrapper for the DemandedElts variant of getScalarizationOverhead.
Definition BasicTTIImpl.h:965

llvm::BasicTTIImplBase::isNumRegsMajorCostOfLSR
bool isNumRegsMajorCostOfLSR() const override
Definition BasicTTIImpl.h:535

llvm::BasicTTIImpl::BasicTTIImpl
LLVM_ABI BasicTTIImpl(const TargetMachine *TM, const Function &F)
Definition BasicTargetTransformInfo.cpp:32

llvm::BitVector
Definition BitVector.h:101

llvm::BitVector::count
size_type count() const
Returns the number of bits which are set.
Definition BitVector.h:181

llvm::BitVector::set
BitVector & set()
Set all bits in the bitvector.
Definition BitVector.h:366

llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition BlockFrequencyInfo.h:38

llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition InstrTypes.h:1181

llvm::CmpInst::makeCmpResultType
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition InstrTypes.h:1049

llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740

llvm::CmpInst::BAD_ICMP_PREDICATE
@ BAD_ICMP_PREDICATE
Definition InstrTypes.h:773

llvm::CmpInst::ICMP_SLE
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:770

llvm::CmpInst::ICMP_UGT
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763

llvm::CmpInst::ICMP_SGT
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767

llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765

llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition InstrTypes.h:761

llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition InstrTypes.h:762

llvm::CmpInst::FCMP_UNO
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:750

llvm::CmpIntrinsic::getGTPredicate
static CmpInst::Predicate getGTPredicate(Intrinsic::ID ID)
Definition IntrinsicInst.h:925

llvm::CmpIntrinsic::getLTPredicate
static CmpInst::Predicate getLTPredicate(Intrinsic::ID ID)
Definition IntrinsicInst.h:932

llvm::ConstantRange
This class represents a range of values.
Definition ConstantRange.h:48

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64

llvm::DenseMap
Definition DenseMap.h:834

llvm::ElementCount
Definition TypeSize.h:298

llvm::ElementCount::isVector
constexpr bool isVector() const
One or more elements.
Definition TypeSize.h:324

llvm::ElementCount::getFixed
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309

llvm::ElementCount::isScalar
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320

llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23

llvm::FeatureBitset
Container class for subtarget features.
Definition SubtargetFeature.h:43

llvm::FixedVectorType
Class to represent fixed width SIMD vectors.
Definition DerivedTypes.h:650

llvm::FixedVectorType::getNumElements
unsigned getNumElements() const
Definition DerivedTypes.h:693

llvm::FixedVectorType::get
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:867

llvm::Function
Definition Function.h:65

llvm::Function::getAttributes
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:328

llvm::GlobalValue
Definition GlobalValue.h:49

llvm::InstCombiner
The core instruction combiner logic.
Definition InstCombiner.h:49

llvm::InstructionCost
Definition InstructionCost.h:30

llvm::InstructionCost::getInvalid
static InstructionCost getInvalid(CostType Val=0)
Definition InstructionCost.h:82

llvm::Instruction
Definition Instruction.h:70

llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition Instruction.h:344

llvm::IntegerType::get
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:348

llvm::IntrinsicCostAttributes
Definition TargetTransformInfo.h:178

llvm::IntrinsicCostAttributes::getFlags
FastMathFlags getFlags() const
Definition TargetTransformInfo.h:212

llvm::IntrinsicCostAttributes::getArgTypes
const SmallVectorImpl< Type * > & getArgTypes() const
Definition TargetTransformInfo.h:215

llvm::IntrinsicCostAttributes::getReturnType
Type * getReturnType() const
Definition TargetTransformInfo.h:211

llvm::IntrinsicCostAttributes::skipScalarizationCost
bool skipScalarizationCost() const
Definition TargetTransformInfo.h:221

llvm::IntrinsicCostAttributes::getArgs
const SmallVectorImpl< const Value * > & getArgs() const
Definition TargetTransformInfo.h:214

llvm::IntrinsicCostAttributes::getScalarizationCost
InstructionCost getScalarizationCost() const
Definition TargetTransformInfo.h:213

llvm::IntrinsicCostAttributes::getInst
const IntrinsicInst * getInst() const
Definition TargetTransformInfo.h:210

llvm::IntrinsicCostAttributes::getID
Intrinsic::ID getID() const
Definition TargetTransformInfo.h:209

llvm::IntrinsicCostAttributes::isTypeBasedOnly
bool isTypeBasedOnly() const
Definition TargetTransformInfo.h:217

llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition IntrinsicInst.h:49

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68

llvm::Loop
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40

llvm::MCSubtargetInfo::getFeatureBits
const FeatureBitset & getFeatureBits() const
Definition MCSubtargetInfo.h:141

llvm::MVT
Machine Value Type.
Definition MachineValueType.h:36

llvm::MVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition MachineValueType.h:391

llvm::MachineMemOperand::MONone
@ MONone
Definition MachineMemOperand.h:135

llvm::MemIntrinsicCostAttributes
Information for memory intrinsic cost model.
Definition TargetTransformInfo.h:128

llvm::MemIntrinsicCostAttributes::getAlignment
Align getAlignment() const
Definition TargetTransformInfo.h:175

llvm::MemIntrinsicCostAttributes::getDataType
Type * getDataType() const
Definition TargetTransformInfo.h:172

llvm::MemIntrinsicCostAttributes::getVariableMask
bool getVariableMask() const
Definition TargetTransformInfo.h:173

llvm::MemIntrinsicCostAttributes::getID
Intrinsic::ID getID() const
Definition TargetTransformInfo.h:169

llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition OptimizationRemarkEmitter.h:33

llvm::OptimizationRemarkEmitter::emit
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition OptimizationRemarkEmitter.cpp:79

llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition DiagnosticInfo.h:767

llvm::PointerType::get
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.

llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition ProfileSummaryInfo.h:42

llvm::SCEV
This class represents an analyzed expression in the program.
Definition ScalarEvolution.h:249

llvm::ScalarEvolution
The main scalar evolution driver.
Definition ScalarEvolution.h:616

llvm::ShuffleVectorInst::isZeroEltSplatMask
static LLVM_ABI bool isZeroEltSplatMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses all elements with the same value as the first element of exa...
Definition Instructions.cpp:2008

llvm::ShuffleVectorInst::isSpliceMask
static LLVM_ABI bool isSpliceMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is a splice mask, concatenating the two inputs together and then ext...
Definition Instructions.cpp:2073

llvm::ShuffleVectorInst::isSelectMask
static LLVM_ABI bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
Definition Instructions.cpp:2022

llvm::ShuffleVectorInst::isExtractSubvectorMask
static LLVM_ABI bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
Definition Instructions.cpp:2107

llvm::ShuffleVectorInst::isReverseMask
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
Definition Instructions.cpp:1988

llvm::ShuffleVectorInst::isTransposeMask
static LLVM_ABI bool isTransposeMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask is a transpose mask.
Definition Instructions.cpp:2037

llvm::ShuffleVectorInst::isInsertSubvectorMask
static LLVM_ABI bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
Definition Instructions.cpp:2136

llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition SmallPtrSet.h:386

llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition SmallPtrSet.h:532

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition SmallVector.h:592

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:423

llvm::SmallVectorTemplateCommon::size
size_t size() const
Definition SmallVector.h:83

llvm::SmallVectorTemplateCommon::empty
bool empty() const
Definition SmallVector.h:86

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1236

llvm::StackOffset
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30

llvm::StackOffset::getScalable
static StackOffset getScalable(int64_t Scalable)
Definition TypeSize.h:40

llvm::StackOffset::getFixed
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:39

llvm::StructType::create
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Definition Type.cpp:683

llvm::SwitchInst
Multiway switch.
Definition Instructions.h:3341

llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition TargetLibraryInfo.h:266

llvm::TargetLoweringBase
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
Definition TargetLowering.h:199

llvm::TargetLoweringBase::isOperationExpand
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
Definition TargetLowering.h:1479

llvm::TargetLoweringBase::InstructionOpcodeToISD
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
Definition TargetLoweringBase.cpp:2232

llvm::TargetLoweringBase::getValueType
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
Definition TargetLowering.h:1778

llvm::TargetLoweringBase::LegalizeAction
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
Definition TargetLowering.h:203

llvm::TargetLoweringBase::Custom
@ Custom
Definition TargetLowering.h:208

llvm::TargetLoweringBase::Expand
@ Expand
Definition TargetLowering.h:206

llvm::TargetLoweringBase::Legal
@ Legal
Definition TargetLowering.h:204

llvm::TargetLoweringBase::preferSelectsOverBooleanArithmetic
virtual bool preferSelectsOverBooleanArithmetic(EVT VT) const
Should we prefer selects to doing arithmetic on boolean types.
Definition TargetLowering.h:3609

llvm::TargetLoweringBase::isZExtFree
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
Definition TargetLowering.h:3235

llvm::TargetLoweringBase::TypeSplitVector
@ TypeSplitVector
Definition TargetLowering.h:220

llvm::TargetLoweringBase::TypeExpandInteger
@ TypeExpandInteger
Definition TargetLowering.h:216

llvm::TargetLoweringBase::TypeScalarizeScalableVector
@ TypeScalarizeScalableVector
Definition TargetLowering.h:223

llvm::TargetLoweringBase::TypeLegal
@ TypeLegal
Definition TargetLowering.h:214

llvm::TargetLoweringBase::isSuitableForJumpTable
virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, uint64_t Range, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
Return true if lowering to a jump table is suitable for a set of case clusters which may contain NumC...
Definition TargetLoweringBase.cpp:2077

llvm::TargetLoweringBase::areJTsAllowed
virtual bool areJTsAllowed(const Function *Fn) const
Return true if lowering to a jump table is allowed.
Definition TargetLowering.h:1408

llvm::TargetLoweringBase::isOperationLegalOrPromote
bool isOperationLegalOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal using promotion.
Definition TargetLowering.h:1376

llvm::TargetLoweringBase::getTruncStoreAction
LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT, Align Alignment, unsigned AddrSpace) const
Return how this store with truncation should be treated: either it is legal, needs to be promoted to ...
Definition TargetLowering.h:1567

llvm::TargetLoweringBase::isOperationCustom
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
Definition TargetLowering.h:1403

llvm::TargetLoweringBase::isSuitableForBitTests
bool isSuitableForBitTests(const DenseMap< const BasicBlock *, unsigned int > &DestCmps, const APInt &Low, const APInt &High, const DataLayout &DL) const
Return true if lowering to a bit test is suitable for a set of case clusters which contains NumDests ...
Definition TargetLowering.h:1440

llvm::TargetLoweringBase::isTruncateFree
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
Definition TargetLowering.h:3140

llvm::TargetLoweringBase::isTypeLegal
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Definition TargetLowering.h:1106

llvm::TargetLoweringBase::isFreeAddrSpaceCast
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
Definition TargetLoweringBase.cpp:1366

llvm::TargetLoweringBase::isOperationLegal
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Definition TargetLowering.h:1484

llvm::TargetLoweringBase::isOperationLegalOrCustom
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition TargetLowering.h:1362

llvm::TargetLoweringBase::getLoadAction
LegalizeAction getLoadAction(EVT ValVT, EVT MemVT, Align Alignment, unsigned AddrSpace, unsigned ExtType, bool Atomic) const
Return how this load with extension should be treated: either it is legal, needs to be promoted to a ...
Definition TargetLowering.h:1507

llvm::TargetLoweringBase::getTypeAction
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
Definition TargetLowering.h:1153

llvm::TargetLoweringBase::isLoadLegal
bool isLoadLegal(EVT ValVT, EVT MemVT, Align Alignment, unsigned AddrSpace, unsigned ExtType, bool Atomic) const
Return true if the specified load with extension is legal on this target.
Definition TargetLowering.h:1537

llvm::TargetLoweringBase::isFAbsFree
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
Definition TargetLowering.h:3412

llvm::TargetLoweringBase::isOperationLegalOrCustomOrPromote
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition TargetLowering.h:1390

llvm::TargetLoweringBase::LegalizeKind
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
Definition TargetLowering.h:234

llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition TargetMachine.h:83

llvm::TargetMachine::isPositionIndependent
bool isPositionIndependent() const
Definition TargetMachine.cpp:143

llvm::TargetMachine::getTargetTriple
const Triple & getTargetTriple() const
Definition TargetMachine.h:132

llvm::TargetMachine::getSubtargetImpl
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Definition TargetMachine.h:139

llvm::TargetMachine::getCodeModel
CodeModel::Model getCodeModel() const
Returns the code model.
Definition TargetMachine.h:259

llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition TargetSubtargetInfo.h:67

llvm::TargetSubtargetInfo::getInlineMustMatchFeatures
virtual const FeatureBitset & getInlineMustMatchFeatures() const =0
Target features where all mismatches prevent inlining.

llvm::TargetSubtargetInfo::getInlineInverseFeatures
virtual const FeatureBitset & getInlineInverseFeatures() const =0
Target features where the callee may have an additional feature, instead of the caller.

llvm::TargetSubtargetInfo::getInlineIgnoreFeatures
virtual const FeatureBitset & getInlineIgnoreFeatures() const =0
Target features to ignore for inline compatibility check.

llvm::TargetTransformInfoImplBase::isProfitableLSRChainElement
virtual bool isProfitableLSRChainElement(Instruction *I) const
Definition TargetTransformInfoImpl.h:340

llvm::TargetTransformInfoImplBase::getPreferredTailFoldingStyle
virtual TailFoldingStyle getPreferredTailFoldingStyle() const
Definition TargetTransformInfoImpl.h:281

llvm::TargetTransformInfoImplBase::getDataLayout
virtual const DataLayout & getDataLayout() const
Definition TargetTransformInfoImpl.h:51

llvm::TargetTransformInfoImplBase::getCacheAssociativity
virtual std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
Definition TargetTransformInfoImpl.h:693

llvm::TargetTransformInfoImplBase::simplifyDemandedVectorEltsIntrinsic
virtual std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Definition TargetTransformInfoImpl.h:297

llvm::TargetTransformInfoImplBase::shouldDropLSRSolutionIfLessProfitable
virtual bool shouldDropLSRSolutionIfLessProfitable() const
Definition TargetTransformInfoImpl.h:338

llvm::TargetTransformInfoImplBase::isHardwareLoopProfitable
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Definition TargetTransformInfoImpl.h:268

llvm::TargetTransformInfoImplBase::simplifyDemandedUseBitsIntrinsic
virtual std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Definition TargetTransformInfoImpl.h:291

llvm::TargetTransformInfoImplBase::preferTailFoldingOverEpilogue
virtual bool preferTailFoldingOverEpilogue(TailFoldingInfo *TFI) const
Definition TargetTransformInfoImpl.h:277

llvm::TargetTransformInfoImplBase::instCombineIntrinsic
virtual std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Definition TargetTransformInfoImpl.h:286

llvm::TargetTransformInfoImplBase::getEpilogueVectorizationMinVF
virtual unsigned getEpilogueVectorizationMinVF() const
Definition TargetTransformInfoImpl.h:275

llvm::TargetTransformInfoImplBase::getScalarizationOverhead
virtual InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const
Definition TargetTransformInfoImpl.h:502

llvm::TargetTransformInfoImplBase::isLoweredToCall
virtual bool isLoweredToCall(const Function *F) const
Definition TargetTransformInfoImpl.h:221

llvm::TargetTransformInfoImplBase::getArithmeticInstrCost
virtual InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr) const
Definition TargetTransformInfoImpl.h:730

llvm::TargetTransformInfoImplBase::getCFInstrCost
virtual InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
Definition TargetTransformInfoImpl.h:836

llvm::TargetTransformInfoImplBase::isLSRCostLess
virtual bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const
Definition TargetTransformInfoImpl.h:328

llvm::TargetTransformInfoImplBase::getCastInstrCost
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I) const
Definition TargetTransformInfoImpl.h:785

llvm::TargetTransformInfoImplBase::getIntrinsicInstrCost
virtual InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
Definition TargetTransformInfoImpl.h:920

llvm::TargetTransformInfoImplBase::getCmpSelInstrCost
virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, const Instruction *I) const
Definition TargetTransformInfoImpl.h:846

llvm::TargetTransformInfoImplBase::DL
const DataLayout & DL
Definition TargetTransformInfoImpl.h:40

llvm::TargetTransformInfoImplBase::isNumRegsMajorCostOfLSR
virtual bool isNumRegsMajorCostOfLSR() const
Definition TargetTransformInfoImpl.h:336

llvm::TargetTransformInfoImplCRTPBase::TargetTransformInfoImplCRTPBase
TargetTransformInfoImplCRTPBase(const DataLayout &DL)
Definition TargetTransformInfoImpl.h:1336

llvm::TargetTransformInfoImplCRTPBase::getGEPCost
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind) const override
Definition TargetTransformInfoImpl.h:1339

llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition TargetTransformInfo.h:268

llvm::TargetTransformInfo::VectorInstrContext
VectorInstrContext
Represents a hint about the context in which an insert/extract is used.
Definition TargetTransformInfo.h:1068

llvm::TargetTransformInfo::VectorInstrContext::None
@ None
The insert/extract is not used with a load/store.
Definition TargetTransformInfo.h:1069

llvm::TargetTransformInfo::getOperandInfo
static LLVM_ABI OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
Definition TargetTransformInfo.cpp:936

llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition TargetTransformInfo.h:331

llvm::TargetTransformInfo::TCK_RecipThroughput
@ TCK_RecipThroughput
Reciprocal throughput.
Definition TargetTransformInfo.h:332

llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition TargetTransformInfo.h:334

llvm::TargetTransformInfo::TCK_Latency
@ TCK_Latency
The latency of instruction.
Definition TargetTransformInfo.h:333

llvm::TargetTransformInfo::OP_None
@ OP_None
Definition TargetTransformInfo.h:1277

llvm::TargetTransformInfo::OP_PowerOf2
@ OP_PowerOf2
Definition TargetTransformInfo.h:1278

llvm::TargetTransformInfo::requiresOrderedReduction
static bool requiresOrderedReduction(std::optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
Definition TargetTransformInfo.h:1723

llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition TargetTransformInfo.h:1352

llvm::TargetTransformInfo::PartialReductionExtendKind
PartialReductionExtendKind
Definition TargetTransformInfo.h:270

llvm::TargetTransformInfo::PR_None
@ PR_None
Definition TargetTransformInfo.h:271

llvm::TargetTransformInfo::TCC_Expensive
@ TCC_Expensive
The cost of a 'div' instruction on x86.
Definition TargetTransformInfo.h:359

llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition TargetTransformInfo.h:358

llvm::TargetTransformInfo::getOpcodeForPartialReductionExtendKind
static LLVM_ABI Instruction::CastOps getOpcodeForPartialReductionExtendKind(PartialReductionExtendKind Kind)
Get the cast opcode for an extension kind.
Definition TargetTransformInfo.cpp:1066

llvm::TargetTransformInfo::MemIndexedMode
MemIndexedMode
The type of load/store indexing.
Definition TargetTransformInfo.h:1884

llvm::TargetTransformInfo::MIM_Unindexed
@ MIM_Unindexed
No indexing.
Definition TargetTransformInfo.h:1885

llvm::TargetTransformInfo::MIM_PostInc
@ MIM_PostInc
Post-incrementing.
Definition TargetTransformInfo.h:1888

llvm::TargetTransformInfo::MIM_PostDec
@ MIM_PostDec
Post-decrementing.
Definition TargetTransformInfo.h:1889

llvm::TargetTransformInfo::MIM_PreDec
@ MIM_PreDec
Pre-decrementing.
Definition TargetTransformInfo.h:1887

llvm::TargetTransformInfo::MIM_PreInc
@ MIM_PreInc
Pre-incrementing.
Definition TargetTransformInfo.h:1886

llvm::TargetTransformInfo::getVectorInstrContextHint
static LLVM_ABI VectorInstrContext getVectorInstrContextHint(const Instruction *I)
Calculates a VectorInstrContext from I.
Definition TargetTransformInfo.cpp:652

llvm::TargetTransformInfo::ShuffleKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Definition TargetTransformInfo.h:1248

llvm::TargetTransformInfo::SK_InsertSubvector
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
Definition TargetTransformInfo.h:1255

llvm::TargetTransformInfo::SK_Select
@ SK_Select
Selects elements from the corresponding lane of either source operand.
Definition TargetTransformInfo.h:1251

llvm::TargetTransformInfo::SK_PermuteSingleSrc
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
Definition TargetTransformInfo.h:1259

llvm::TargetTransformInfo::SK_Transpose
@ SK_Transpose
Transpose two vectors.
Definition TargetTransformInfo.h:1254

llvm::TargetTransformInfo::SK_Splice
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
Definition TargetTransformInfo.h:1261

llvm::TargetTransformInfo::SK_Broadcast
@ SK_Broadcast
Broadcast element 0 to all other elements.
Definition TargetTransformInfo.h:1249

llvm::TargetTransformInfo::SK_PermuteTwoSrc
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
Definition TargetTransformInfo.h:1257

llvm::TargetTransformInfo::SK_Reverse
@ SK_Reverse
Reverse the order of the vector.
Definition TargetTransformInfo.h:1250

llvm::TargetTransformInfo::SK_ExtractSubvector
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
Definition TargetTransformInfo.h:1256

llvm::TargetTransformInfo::CastContextHint
CastContextHint
Represents a hint about the context in which a cast is used.
Definition TargetTransformInfo.h:1586

llvm::TargetTransformInfo::CastContextHint::Normal
@ Normal
The cast is used with a normal load/store.
Definition TargetTransformInfo.h:1588

llvm::TargetTransformInfo::OK_UniformConstantValue
@ OK_UniformConstantValue
Definition TargetTransformInfo.h:1271

llvm::TargetTransformInfo::OK_AnyValue
@ OK_AnyValue
Definition TargetTransformInfo.h:1269

llvm::TargetTransformInfo::CacheLevel
CacheLevel
The possible cache levels.
Definition TargetTransformInfo.h:1412

llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47

llvm::Triple::aarch64
@ aarch64
Definition Triple.h:54

llvm::Triple::getArch
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition Triple.h:425

llvm::Triple::isArch64Bit
LLVM_ABI bool isArch64Bit() const
Test whether the architecture is 64-bit.
Definition Triple.cpp:2073

llvm::Triple::isOSDarwin
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, DriverKit, XROS, or bridgeOS).
Definition Triple.h:634

llvm::TypeSize
Definition TypeSize.h:332

llvm::TypeSize::getFixed
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46

llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:288

llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:282

llvm::Type::getPointerAddressSpace
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition DerivedTypes.h:839

llvm::Type::getInt8Ty
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:307

llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368

llvm::Type::getWithNewBitWidth
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
Definition DerivedTypes.h:832

llvm::Type::getWithNewType
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
Definition DerivedTypes.h:826

llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:130

llvm::Type::getScalarSizeInBits
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232

llvm::Type::getInt1Ty
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:306

llvm::Type::getIntNTy
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:313

llvm::Type::isFPOrFPVectorTy
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:227

llvm::Type::getContainedType
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition Type.h:397

llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:141

llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition User.h:207

llvm::VPBinOpIntrinsic::isVPBinOp
static LLVM_ABI bool isVPBinOp(Intrinsic::ID ID)
Definition IntrinsicInst.cpp:742

llvm::VPCastIntrinsic::isVPCast
static LLVM_ABI bool isVPCast(Intrinsic::ID ID)
Definition IntrinsicInst.cpp:725

llvm::VPCmpIntrinsic::isVPCmp
static LLVM_ABI bool isVPCmp(Intrinsic::ID ID)
Definition IntrinsicInst.cpp:732

llvm::VPIntrinsic::getFunctionalOpcodeForVP
static LLVM_ABI std::optional< unsigned > getFunctionalOpcodeForVP(Intrinsic::ID ID)
Definition IntrinsicInst.cpp:505

llvm::VPIntrinsic::getFunctionalIntrinsicIDForVP
static LLVM_ABI std::optional< Intrinsic::ID > getFunctionalIntrinsicIDForVP(Intrinsic::ID ID)
Definition IntrinsicInst.cpp:524

llvm::VPIntrinsic::isVPIntrinsic
static LLVM_ABI bool isVPIntrinsic(Intrinsic::ID)
Definition IntrinsicInst.cpp:486

llvm::VPReductionIntrinsic::isVPReduction
static LLVM_ABI bool isVPReduction(Intrinsic::ID ID)
Definition IntrinsicInst.cpp:702

llvm::Value
LLVM Value Representation.
Definition Value.h:75

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255

llvm::VectorType
Base class of all SIMD vector types.
Definition DerivedTypes.h:490

llvm::VectorType::getHalfElementsVectorType
static VectorType * getHalfElementsVectorType(VectorType *VTy)
This static method returns a VectorType with half as many elements as the input type and the same ele...
Definition DerivedTypes.h:592

llvm::VectorType::get
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.

llvm::VectorType::getElementType
Type * getElementType() const
Definition DerivedTypes.h:523

llvm::cl::opt
Definition CommandLine.h:1472

llvm::details::FixedOrScalableQuantity::getFixedValue
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200

llvm::details::FixedOrScalableQuantity< TypeSize, uint64_t >::isKnownLT
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:216

llvm::details::FixedOrScalableQuantity::isScalable
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168

llvm::details::FixedOrScalableQuantity::getKnownMinValue
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165

llvm::details::FixedOrScalableQuantity::divideCoefficientBy
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252

uint64_t

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

TargetMachine.h

llvm::AArch64CC::LT
@ LT
Definition AArch64BaseInfo.h:301

llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition AMDGPUMetadata.h:396

llvm::APIntOps::ScaleBitMask
LLVM_ABI APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition APInt.cpp:3040

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::CallingConv::Fast
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34

llvm::CodeModel::Medium
@ Medium
Definition CodeGen.h:31

llvm::CodeModel::Large
@ Large
Definition CodeGen.h:31

llvm::ISD
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
Definition ISDOpcodes.h:24

llvm::ISD::STORE
@ STORE
Definition ISDOpcodes.h:1183

llvm::ISD::LRINT
@ LRINT
Definition ISDOpcodes.h:1083

llvm::ISD::FLOG10
@ FLOG10
Definition ISDOpcodes.h:1070

llvm::ISD::MASKED_SREM
@ MASKED_SREM
Definition ISDOpcodes.h:1644

llvm::ISD::SREM
@ SREM
Definition ISDOpcodes.h:269

llvm::ISD::UDIV
@ UDIV
Definition ISDOpcodes.h:268

llvm::ISD::UMIN
@ UMIN
Definition ISDOpcodes.h:729

llvm::ISD::BSWAP
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:787

llvm::ISD::FPOW
@ FPOW
Definition ISDOpcodes.h:1056

llvm::ISD::SMULFIX
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition ISDOpcodes.h:394

llvm::ISD::UADDO
@ UADDO
Definition ISDOpcodes.h:349

llvm::ISD::FTRUNC
@ FTRUNC
Definition ISDOpcodes.h:1075

llvm::ISD::SDIV
@ SDIV
Definition ISDOpcodes.h:267

llvm::ISD::FMAXNUM_IEEE
@ FMAXNUM_IEEE
Definition ISDOpcodes.h:1117

llvm::ISD::LLRINT
@ LLRINT
Definition ISDOpcodes.h:1084

llvm::ISD::STRICT_FMA
@ STRICT_FMA
Definition ISDOpcodes.h:432

llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518

llvm::ISD::UMULFIX
@ UMULFIX
Definition ISDOpcodes.h:395

llvm::ISD::FMODF
@ FMODF
FMODF - Decomposes the operand into integral and fractional parts, each having the same type and sign...
Definition ISDOpcodes.h:1139

llvm::ISD::FATAN2
@ FATAN2
FATAN2 - atan2, inspired by libm.
Definition ISDOpcodes.h:1061

llvm::ISD::FABS
@ FABS
Definition ISDOpcodes.h:1044

llvm::ISD::FSINCOSPI
@ FSINCOSPI
FSINCOSPI - Compute both the sine and cosine times pi more accurately than FSINCOS(pi*x),...
Definition ISDOpcodes.h:1135

llvm::ISD::FNEARBYINT
@ FNEARBYINT
Definition ISDOpcodes.h:1077

llvm::ISD::MASKED_SDIV
@ MASKED_SDIV
Definition ISDOpcodes.h:1642

llvm::ISD::FCOSH
@ FCOSH
Definition ISDOpcodes.h:1054

llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417

llvm::ISD::ABS
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747

llvm::ISD::FEXP10
@ FEXP10
Definition ISDOpcodes.h:1073

llvm::ISD::UDIVREM
@ UDIVREM
Definition ISDOpcodes.h:281

llvm::ISD::SDIVREM
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280

llvm::ISD::FACOS
@ FACOS
Definition ISDOpcodes.h:1051

llvm::ISD::MASKED_UREM
@ MASKED_UREM
Definition ISDOpcodes.h:1643

llvm::ISD::FMAXIMUM
@ FMAXIMUM
Definition ISDOpcodes.h:1123

llvm::ISD::FATAN
@ FATAN
Definition ISDOpcodes.h:1052

llvm::ISD::FFLOOR
@ FFLOOR
Definition ISDOpcodes.h:1080

llvm::ISD::CLMUL
@ CLMUL
Carry-less multiplication operations.
Definition ISDOpcodes.h:778

llvm::ISD::FLDEXP
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition ISDOpcodes.h:1059

llvm::ISD::UCMP
@ UCMP
Definition ISDOpcodes.h:736

llvm::ISD::LLROUND
@ LLROUND
Definition ISDOpcodes.h:1082

llvm::ISD::USUBO
@ USUBO
Definition ISDOpcodes.h:353

llvm::ISD::FLOG2
@ FLOG2
Definition ISDOpcodes.h:1069

llvm::ISD::UADDSAT
@ UADDSAT
Definition ISDOpcodes.h:366

llvm::ISD::FASIN
@ FASIN
Definition ISDOpcodes.h:1050

llvm::ISD::FMAXNUM
@ FMAXNUM
Definition ISDOpcodes.h:1101

llvm::ISD::FRINT
@ FRINT
Definition ISDOpcodes.h:1076

llvm::ISD::FSINCOS
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition ISDOpcodes.h:1131

llvm::ISD::CTTZ
@ CTTZ
Definition ISDOpcodes.h:788

llvm::ISD::SSUBO
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352

llvm::ISD::BRIND
@ BRIND
BRIND - Indirect branch.
Definition ISDOpcodes.h:1203

llvm::ISD::BR_JT
@ BR_JT
BR_JT - Jumptable branch.
Definition ISDOpcodes.h:1207

llvm::ISD::FCANONICALIZE
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541

llvm::ISD::SSUBSAT
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374

llvm::ISD::UMULO
@ UMULO
Definition ISDOpcodes.h:357

llvm::ISD::SELECT
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:804

llvm::ISD::FTANH
@ FTANH
Definition ISDOpcodes.h:1055

llvm::ISD::FSHL
@ FSHL
Definition ISDOpcodes.h:774

llvm::ISD::SADDO
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348

llvm::ISD::FSHR
@ FSHR
Definition ISDOpcodes.h:775

llvm::ISD::FROUND
@ FROUND
Definition ISDOpcodes.h:1078

llvm::ISD::USUBSAT
@ USUBSAT
Definition ISDOpcodes.h:375

llvm::ISD::FMINNUM_IEEE
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition ISDOpcodes.h:1116

llvm::ISD::FCOS
@ FCOS
Definition ISDOpcodes.h:1048

llvm::ISD::FP_TO_UINT_SAT
@ FP_TO_UINT_SAT
Definition ISDOpcodes.h:954

llvm::ISD::CTPOP
@ CTPOP
Definition ISDOpcodes.h:790

llvm::ISD::FTAN
@ FTAN
Definition ISDOpcodes.h:1049

llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
Definition ISDOpcodes.h:1100

llvm::ISD::SMULO
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:356

llvm::ISD::SMIN
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727

llvm::ISD::MASKED_UDIV
@ MASKED_UDIV
Masked vector arithmetic that returns poison on disabled lanes.
Definition ISDOpcodes.h:1641

llvm::ISD::VSELECT
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:813

llvm::ISD::FROUNDEVEN
@ FROUNDEVEN
Definition ISDOpcodes.h:1079

llvm::ISD::FMINIMUM
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition ISDOpcodes.h:1122

llvm::ISD::FLOG
@ FLOG
Definition ISDOpcodes.h:1068

llvm::ISD::SCMP
@ SCMP
[US]CMP - 3-way comparison of signed or unsigned integers.
Definition ISDOpcodes.h:735

llvm::ISD::UREM
@ UREM
Definition ISDOpcodes.h:270

llvm::ISD::FSIN
@ FSIN
Definition ISDOpcodes.h:1047

llvm::ISD::FEXP
@ FEXP
Definition ISDOpcodes.h:1071

llvm::ISD::FCEIL
@ FCEIL
Definition ISDOpcodes.h:1074

llvm::ISD::MUL
@ MUL
Definition ISDOpcodes.h:266

llvm::ISD::FSINH
@ FSINH
Definition ISDOpcodes.h:1053

llvm::ISD::LROUND
@ LROUND
Definition ISDOpcodes.h:1081

llvm::ISD::CTLZ
@ CTLZ
Definition ISDOpcodes.h:789

llvm::ISD::FMAXIMUMNUM
@ FMAXIMUMNUM
Definition ISDOpcodes.h:1128

llvm::ISD::FSQRT
@ FSQRT
Definition ISDOpcodes.h:1045

llvm::ISD::FP_TO_SINT_SAT
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:953

llvm::ISD::BITREVERSE
@ BITREVERSE
Definition ISDOpcodes.h:791

llvm::ISD::FCOPYSIGN
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534

llvm::ISD::SADDSAT
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365

llvm::ISD::FEXP2
@ FEXP2
Definition ISDOpcodes.h:1072

llvm::ISD::SMAX
@ SMAX
Definition ISDOpcodes.h:728

llvm::ISD::UMAX
@ UMAX
Definition ISDOpcodes.h:730

llvm::ISD::FMINIMUMNUM
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
Definition ISDOpcodes.h:1127

llvm::ISD::MemIndexedMode
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition ISDOpcodes.h:1738

llvm::ISD::POST_DEC
@ POST_DEC
Definition ISDOpcodes.h:1738

llvm::ISD::PRE_DEC
@ PRE_DEC
Definition ISDOpcodes.h:1738

llvm::ISD::POST_INC
@ POST_INC
Definition ISDOpcodes.h:1738

llvm::ISD::PRE_INC
@ PRE_INC
Definition ISDOpcodes.h:1738

llvm::ISD::UNINDEXED
@ UNINDEXED
Definition ISDOpcodes.h:1738

llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition ISDOpcodes.h:1769

llvm::ISD::ZEXTLOAD
@ ZEXTLOAD
Definition ISDOpcodes.h:1769

llvm::ISD::EXTLOAD
@ EXTLOAD
Definition ISDOpcodes.h:1769

llvm::Intrinsic::ID
unsigned ID
Definition GenericSSAContext.h:28

llvm::Intrinsic::isTargetIntrinsic
LLVM_ABI bool isTargetIntrinsic(ID IID)
isTargetIntrinsic - Returns true if IID is an intrinsic specific to a certain target.
Definition Intrinsics.cpp:669

llvm::NVPTXAS::AddressSpace
AddressSpace
Definition NVPTXAddrSpace.h:21

llvm::RTLIB::getSINCOSPI
LLVM_ABI Libcall getSINCOSPI(EVT RetVT)
getSINCOSPI - Return the SINCOSPI_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Definition TargetLoweringBase.cpp:612

llvm::RTLIB::getMODF
LLVM_ABI Libcall getMODF(EVT VT)
getMODF - Return the MODF_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Definition TargetLoweringBase.cpp:685

llvm::RTLIB::getSINCOS
LLVM_ABI Libcall getSINCOS(EVT RetVT)
getSINCOS - Return the SINCOS_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Definition TargetLoweringBase.cpp:581

llvm::SI
Definition SIInstrInfo.h:1915

llvm::ThreadModel::Single
@ Single
Definition TargetOptions.h:52

llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition OptimizationRemarkEmitter.h:139

llvm::sandboxir::Instruction
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition FunctionInfo.h:25

llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739

llvm::getMinMaxReductionIntrinsicOp
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
Definition LoopUtils.cpp:1212

llvm::zip_equal
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:840

llvm::Cost
InstructionCost Cost
Definition FunctionSpecialization.h:103

llvm::enumerate
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2554

llvm::toScalarizedTy
Type * toScalarizedTy(Type *Ty)
A helper for converting vectorized types to scalarized (non-vector) types.
Definition VectorTypeUtils.h:65

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643

llvm::dyn_cast_if_present
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732

llvm::getArithmeticReductionInstruction
LLVM_ABI unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
Definition LoopUtils.cpp:1160

llvm::isVectorizedTy
bool isVectorizedTy(Type *Ty)
Returns true if Ty is a vector type or a struct of vector types where all vector types share the same...
Definition VectorTypeUtils.h:73

llvm::FloatStyle::Exponent
@ Exponent
Definition NativeFormatting.h:18

llvm::concat
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1151

llvm::dyn_cast_or_null
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753

llvm::has_single_bit
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746

llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331

llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279

llvm::getVectorizedTypeVF
ElementCount getVectorizedTypeVF(Type *Ty)
Returns the number of vector elements for a vectorized type.
Definition VectorTypeUtils.h:100

llvm::getVScaleRange
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
Definition ValueTracking.cpp:1272

llvm::SmallVector
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Definition SmallVector.h:1162

llvm::isa
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547

llvm::PoisonMaskElem
constexpr int PoisonMaskElem
Definition Instructions.h:1943

llvm::divideCeil
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394

llvm::RecurKind::UMin
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
Definition IVDescriptors.h:47

llvm::RecurKind::UMax
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
Definition IVDescriptors.h:48

llvm::Op
DWARFExpression::Operation Op
Definition DWARFExpressionPrinter.cpp:25

llvm::ArrayRef
ArrayRef(const T &OneElt) -> ArrayRef< T >

llvm::BitWidth
constexpr unsigned BitWidth
Definition BitmaskEnum.h:219

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559

llvm::getContainedTypes
ArrayRef< Type * > getContainedTypes(Type *const &Ty)
Returns the types contained in Ty.
Definition VectorTypeUtils.h:93

llvm::TailFoldingStyle
TailFoldingStyle
Definition TargetTransformInfo.h:224

llvm::Data
@ Data
Definition SIMachineScheduler.h:55

llvm::PartialUnrollingThreshold
LLVM_ABI cl::opt< unsigned > PartialUnrollingThreshold

llvm::isVectorizedStructTy
LLVM_ABI bool isVectorizedStructTy(StructType *StructTy)
Returns true if StructTy is an unpacked literal struct where all elements are vectors of matching ele...
Definition VectorTypeUtils.cpp:44

N
#define N

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39

llvm::EVT
Extended Value Type.
Definition ValueTypes.h:35

llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145

llvm::EVT::getVectorElementCount
ElementCount getVectorElementCount() const
Definition ValueTypes.h:373

llvm::EVT::getEVT
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition ValueTypes.cpp:307

llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:339

llvm::EVT::getIntegerVT
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61

llvm::EVT::getTypeForEVT
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition ValueTypes.cpp:218

llvm::HardwareLoopInfo
Attributes of a target dependent hardware loop.
Definition TargetTransformInfo.h:103

llvm::KnownBits
Definition KnownBits.h:24

llvm::RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument
static LLVM_ABI bool hasVectorMaskArgument(RTLIB::LibcallImpl Impl)
Returns true if the function has a vector mask argument, which is assumed to be the last argument.
Definition RuntimeLibcalls.cpp:481

llvm::TailFoldingInfo
Definition TargetTransformInfo.h:253

llvm::TargetLoweringBase::AddrMode
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Definition TargetLowering.h:2981

llvm::TargetLoweringBase::AddrMode::BaseOffs
int64_t BaseOffs
Definition TargetLowering.h:2983

llvm::TargetLoweringBase::AddrMode::BaseGV
GlobalValue * BaseGV
Definition TargetLowering.h:2982

llvm::TargetLoweringBase::AddrMode::HasBaseReg
bool HasBaseReg
Definition TargetLowering.h:2984

llvm::TargetLoweringBase::AddrMode::Scale
int64_t Scale
Definition TargetLowering.h:2985

llvm::TargetLoweringBase::AddrMode::ScalableOffset
int64_t ScalableOffset
Definition TargetLowering.h:2986

llvm::TargetTransformInfo::LSRCost
Definition TargetTransformInfo.h:624

llvm::TargetTransformInfo::OperandValueInfo
Definition TargetTransformInfo.h:1285

llvm::TargetTransformInfo::OperandValueInfo::isConstant
bool isConstant() const
Definition TargetTransformInfo.h:1289

llvm::TargetTransformInfo::OperandValueInfo::Kind
OperandValueKind Kind
Definition TargetTransformInfo.h:1286

llvm::TargetTransformInfo::PeelingPreferences
Definition TargetTransformInfo.h:762

llvm::TargetTransformInfo::PeelingPreferences::AllowPeeling
bool AllowPeeling
Allow peeling off loop iterations.
Definition TargetTransformInfo.h:768

llvm::TargetTransformInfo::PeelingPreferences::AllowLoopNestsPeeling
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
Definition TargetTransformInfo.h:770

llvm::TargetTransformInfo::PeelingPreferences::PeelProfiledIterations
bool PeelProfiledIterations
Allow peeling basing on profile.
Definition TargetTransformInfo.h:775

llvm::TargetTransformInfo::PeelingPreferences::PeelCount
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Definition TargetTransformInfo.h:766

llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition TargetTransformInfo.h:638

llvm::TargetTransformInfo::UnrollingPreferences::UpperBound
bool UpperBound
Allow using trip count upper bound to unroll loops.
Definition TargetTransformInfo.h:709

llvm::TargetTransformInfo::UnrollingPreferences::PartialOptSizeThreshold
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
Definition TargetTransformInfo.h:667

llvm::TargetTransformInfo::UnrollingPreferences::BEInsns
unsigned BEInsns
Definition TargetTransformInfo.h:692

llvm::TargetTransformInfo::UnrollingPreferences::PartialThreshold
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Definition TargetTransformInfo.h:663

llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition TargetTransformInfo.h:699

llvm::TargetTransformInfo::UnrollingPreferences::Partial
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Definition TargetTransformInfo.h:695

llvm::TargetTransformInfo::UnrollingPreferences::OptSizeThreshold
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
Definition TargetTransformInfo.h:660