doxygen/PPCTargetTransformInfo_8cpp_source.html

//===-- PPCTargetTransformInfo.cpp - PPC specific TTI ---------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//


#include "PPCTargetTransformInfo.h"

#include "llvm/Analysis/CodeMetrics.h"

#include "llvm/Analysis/TargetLibraryInfo.h"

#include "llvm/Analysis/TargetTransformInfo.h"

#include "llvm/CodeGen/BasicTTIImpl.h"

#include "llvm/CodeGen/TargetLowering.h"

#include "llvm/CodeGen/TargetSchedule.h"

#include "llvm/IR/IntrinsicsPowerPC.h"

#include "llvm/IR/ProfDataUtils.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Transforms/InstCombine/InstCombiner.h"

#include "llvm/Transforms/Utils/Local.h"

#include <optional>


using namespace llvm;


#define DEBUG_TYPE "ppctti"


static cl::opt<bool> PPCEVL("ppc-evl",

                            cl::desc("Allow EVL type vp.load/vp.store"),

                            cl::init(false), cl::Hidden);


static cl::opt<bool> Pwr9EVL("ppc-pwr9-evl",

                             cl::desc("Allow vp.load and vp.store for pwr9"),

                             cl::init(false), cl::Hidden);


static cl::opt<bool> VecMaskCost("ppc-vec-mask-cost",

cl::desc("add masking cost for i1 vectors"), cl::init(true), cl::Hidden);


static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",

cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);


static cl::opt<bool>

EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false),

                cl::desc("Enable using coldcc calling conv for cold "

                         "internal functions"));


static cl::opt<bool>

LsrNoInsnsCost("ppc-lsr-no-insns-cost", cl::Hidden, cl::init(false),

               cl::desc("Do not add instruction count to lsr cost model"));


// The latency of mtctr is only justified if there are more than 4

// comparisons that will be removed as a result.

static cl::opt<unsigned>

SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden,

                      cl::desc("Loops with a constant trip count smaller than "

                               "this value will not use the count register."));


//===----------------------------------------------------------------------===//

//

// PPC cost model.

//

//===----------------------------------------------------------------------===//


TargetTransformInfo::PopcntSupportKind


PPCTTIImpl::getPopcntSupport(unsigned TyWidth) const {

  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");

  if (ST->hasPOPCNTD() != PPCSubtarget::POPCNTD_Unavailable && TyWidth <= 64)

    return ST->hasPOPCNTD() == PPCSubtarget::POPCNTD_Slow ?

             TTI::PSK_SlowHardware : TTI::PSK_FastHardware;

  return TTI::PSK_Software;

}


std::optional<Instruction *>


PPCTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {

  Intrinsic::ID IID = II.getIntrinsicID();

  switch (IID) {

  default:

    break;

  case Intrinsic::ppc_altivec_lvx:

  case Intrinsic::ppc_altivec_lvxl:

    // Turn PPC lvx -> load if the pointer is known aligned.

    if (getOrEnforceKnownAlignment(

            II.getArgOperand(0), Align(16), IC.getDataLayout(), &II,

            &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {

      Value *Ptr = II.getArgOperand(0);

      return new LoadInst(II.getType(), Ptr, "", false, Align(16));

    }

    break;

  case Intrinsic::ppc_vsx_lxvw4x:

  case Intrinsic::ppc_vsx_lxvd2x: {

    // Turn PPC VSX loads into normal loads.

    Value *Ptr = II.getArgOperand(0);

    return new LoadInst(II.getType(), Ptr, Twine(""), false, Align(1));

  }

  case Intrinsic::ppc_altivec_stvx:

  case Intrinsic::ppc_altivec_stvxl:

    // Turn stvx -> store if the pointer is known aligned.

    if (getOrEnforceKnownAlignment(

            II.getArgOperand(1), Align(16), IC.getDataLayout(), &II,

            &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {

      Value *Ptr = II.getArgOperand(1);

      return new StoreInst(II.getArgOperand(0), Ptr, false, Align(16));

    }

    break;

  case Intrinsic::ppc_vsx_stxvw4x:

  case Intrinsic::ppc_vsx_stxvd2x: {

    // Turn PPC VSX stores into normal stores.

    Value *Ptr = II.getArgOperand(1);

    return new StoreInst(II.getArgOperand(0), Ptr, false, Align(1));

  }

  case Intrinsic::ppc_altivec_vperm:

    // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.

    // Note that ppc_altivec_vperm has a big-endian bias, so when creating

    // a vectorshuffle for little endian, we must undo the transformation

    // performed on vec_perm in altivec.h.  That is, we must complement

    // the permutation mask with respect to 31 and reverse the order of

    // V1 and V2.

    if (Constant *Mask = dyn_cast<Constant>(II.getArgOperand(2))) {

      assert(cast<FixedVectorType>(Mask->getType())->getNumElements() == 16 &&

             "Bad type for intrinsic!");


      // Check that all of the elements are integer constants or undefs.

      bool AllEltsOk = true;

      for (unsigned I = 0; I != 16; ++I) {

        Constant *Elt = Mask->getAggregateElement(I);

        if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {

          AllEltsOk = false;

          break;

        }

      }


      if (AllEltsOk) {

        // Cast the input vectors to byte vectors.

        Value *Op0 =

            IC.Builder.CreateBitCast(II.getArgOperand(0), Mask->getType());

        Value *Op1 =

            IC.Builder.CreateBitCast(II.getArgOperand(1), Mask->getType());

        Value *Result = PoisonValue::get(Op0->getType());


        // Only extract each element once.

        Value *ExtractedElts[32];

        memset(ExtractedElts, 0, sizeof(ExtractedElts));


        for (unsigned I = 0; I != 16; ++I) {

          if (isa<UndefValue>(Mask->getAggregateElement(I)))

            continue;

          unsigned Idx =

              cast<ConstantInt>(Mask->getAggregateElement(I))->getZExtValue();

          Idx &= 31; // Match the hardware behavior.

          if (DL.isLittleEndian())

            Idx = 31 - Idx;


          if (!ExtractedElts[Idx]) {

            Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;

            Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;

            ExtractedElts[Idx] = IC.Builder.CreateExtractElement(

                Idx < 16 ? Op0ToUse : Op1ToUse, IC.Builder.getInt32(Idx & 15));

          }


          // Insert this value into the result vector.

          Result = IC.Builder.CreateInsertElement(Result, ExtractedElts[Idx],

                                                  IC.Builder.getInt32(I));

        }

        return CastInst::Create(Instruction::BitCast, Result, II.getType());

      }

    }

    break;

  }

  return std::nullopt;

}


InstructionCost PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,

                                          TTI::TargetCostKind CostKind) const {

  if (DisablePPCConstHoist)

    return BaseT::getIntImmCost(Imm, Ty, CostKind);


  assert(Ty->isIntegerTy());


  unsigned BitSize = Ty->getPrimitiveSizeInBits();

  if (BitSize == 0)

    return ~0U;


  if (Imm == 0)

    return TTI::TCC_Free;


  if (Imm.getBitWidth() <= 64) {

    if (isInt<16>(Imm.getSExtValue()))

      return TTI::TCC_Basic;


    if (isInt<32>(Imm.getSExtValue())) {

      // A constant that can be materialized using lis.

      if ((Imm.getZExtValue() & 0xFFFF) == 0)

        return TTI::TCC_Basic;


      return 2 * TTI::TCC_Basic;

    }

  }


  return 4 * TTI::TCC_Basic;

}


InstructionCost


PPCTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,

                                const APInt &Imm, Type *Ty,

                                TTI::TargetCostKind CostKind) const {

  if (DisablePPCConstHoist)

    return BaseT::getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);


  assert(Ty->isIntegerTy());


  unsigned BitSize = Ty->getPrimitiveSizeInBits();

  if (BitSize == 0)

    return ~0U;


  switch (IID) {

  default:

    return TTI::TCC_Free;

  case Intrinsic::sadd_with_overflow:

  case Intrinsic::uadd_with_overflow:

  case Intrinsic::ssub_with_overflow:

  case Intrinsic::usub_with_overflow:

    if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue()))

      return TTI::TCC_Free;

    break;

  case Intrinsic::experimental_stackmap:

    if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))

      return TTI::TCC_Free;

    break;

  case Intrinsic::experimental_patchpoint_void:

  case Intrinsic::experimental_patchpoint:

    if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))

      return TTI::TCC_Free;

    break;

  }

  return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind);

}


InstructionCost PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,

                                              const APInt &Imm, Type *Ty,

                                              TTI::TargetCostKind CostKind,

                                              Instruction *Inst) const {

  if (DisablePPCConstHoist)

    return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind, Inst);


  assert(Ty->isIntegerTy());


  unsigned BitSize = Ty->getPrimitiveSizeInBits();

  if (BitSize == 0)

    return ~0U;


  unsigned ImmIdx = ~0U;

  bool ShiftedFree = false, RunFree = false, UnsignedFree = false,

       ZeroFree = false;

  switch (Opcode) {

  default:

    return TTI::TCC_Free;

  case Instruction::GetElementPtr:

    // Always hoist the base address of a GetElementPtr. This prevents the

    // creation of new constants for every base constant that gets constant

    // folded with the offset.

    if (Idx == 0)

      return 2 * TTI::TCC_Basic;

    return TTI::TCC_Free;

  case Instruction::And:

    RunFree = true; // (for the rotate-and-mask instructions)

    [[fallthrough]];

  case Instruction::Add:

  case Instruction::Or:

  case Instruction::Xor:

    ShiftedFree = true;

    [[fallthrough]];

  case Instruction::Sub:

  case Instruction::Mul:

  case Instruction::Shl:

  case Instruction::LShr:

  case Instruction::AShr:

    ImmIdx = 1;

    break;

  case Instruction::ICmp:

    UnsignedFree = true;

    ImmIdx = 1;

    // Zero comparisons can use record-form instructions.

    [[fallthrough]];

  case Instruction::Select:

    ZeroFree = true;

    break;

  case Instruction::PHI:

  case Instruction::Call:

  case Instruction::Ret:

  case Instruction::Load:

  case Instruction::Store:

    break;

  }


  if (ZeroFree && Imm == 0)

    return TTI::TCC_Free;


  if (Idx == ImmIdx && Imm.getBitWidth() <= 64) {

    if (isInt<16>(Imm.getSExtValue()))

      return TTI::TCC_Free;


    if (RunFree) {

      if (Imm.getBitWidth() <= 32 &&

          (isShiftedMask_32(Imm.getZExtValue()) ||

           isShiftedMask_32(~Imm.getZExtValue())))

        return TTI::TCC_Free;


      if (ST->isPPC64() &&

          (isShiftedMask_64(Imm.getZExtValue()) ||

           isShiftedMask_64(~Imm.getZExtValue())))

        return TTI::TCC_Free;

    }


    if (UnsignedFree && isUInt<16>(Imm.getZExtValue()))

      return TTI::TCC_Free;


    if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0)

      return TTI::TCC_Free;

  }


  return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind);

}


// Check if the current Type is an MMA vector type. Valid MMA types are

// v256i1 and v512i1 respectively.


static bool isMMAType(Type *Ty) {

  return Ty->isVectorTy() && (Ty->getScalarSizeInBits() == 1) &&

         (Ty->getPrimitiveSizeInBits() > 128);

}


InstructionCost


PPCTTIImpl::getInstructionCost(const User *U, ArrayRef<const Value *> Operands,

                               TTI::TargetCostKind CostKind) const {

  // We already implement getCastInstrCost and getMemoryOpCost where we perform

  // the vector adjustment there.

  if (isa<CastInst>(U) || isa<LoadInst>(U) || isa<StoreInst>(U))

    return BaseT::getInstructionCost(U, Operands, CostKind);


  if (U->getType()->isVectorTy()) {

    // Instructions that need to be split should cost more.

    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(U->getType());

    return LT.first * BaseT::getInstructionCost(U, Operands, CostKind);

  }


  return BaseT::getInstructionCost(U, Operands, CostKind);

}


bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,

                                          AssumptionCache &AC,

                                          TargetLibraryInfo *LibInfo,

                                          HardwareLoopInfo &HWLoopInfo) const {

  const PPCTargetMachine &TM = ST->getTargetMachine();

  TargetSchedModel SchedModel;

  SchedModel.init(ST);


  // FIXME: Sure there is no other way to get TTI? This should be cheap though.

  TargetTransformInfo TTI =

      TM.getTargetTransformInfo(*L->getHeader()->getParent());


  // Do not convert small short loops to CTR loop.

  unsigned ConstTripCount = SE.getSmallConstantTripCount(L);

  if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {

    SmallPtrSet<const Value *, 32> EphValues;

    CodeMetrics::collectEphemeralValues(L, &AC, EphValues);

    CodeMetrics Metrics;

    for (BasicBlock *BB : L->blocks())

      Metrics.analyzeBasicBlock(BB, TTI, EphValues);

    // 6 is an approximate latency for the mtctr instruction.

    if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))

      return false;

  }


  // Check that there is no hardware loop related intrinsics in the loop.

  for (auto *BB : L->getBlocks())

    for (auto &I : *BB)

      if (auto *Call = dyn_cast<IntrinsicInst>(&I))

        if (Call->getIntrinsicID() == Intrinsic::set_loop_iterations ||

            Call->getIntrinsicID() == Intrinsic::loop_decrement)

          return false;


  SmallVector<BasicBlock*, 4> ExitingBlocks;

  L->getExitingBlocks(ExitingBlocks);


  // If there is an exit edge known to be frequently taken,

  // we should not transform this loop.

  for (auto &BB : ExitingBlocks) {

    Instruction *TI = BB->getTerminator();

    if (!TI) continue;


    if (CondBrInst *BI = dyn_cast<CondBrInst>(TI)) {

      uint64_t TrueWeight = 0, FalseWeight = 0;

      if (!extractBranchWeights(*BI, TrueWeight, FalseWeight))

        continue;


      // If the exit path is more frequent than the loop path,

      // we return here without further analysis for this loop.

      bool TrueIsExit = !L->contains(BI->getSuccessor(0));

      if (( TrueIsExit && FalseWeight < TrueWeight) ||

          (!TrueIsExit && FalseWeight > TrueWeight))

        return false;

    }

  }


  LLVMContext &C = L->getHeader()->getContext();

  HWLoopInfo.CountType = TM.isPPC64() ?

    Type::getInt64Ty(C) : Type::getInt32Ty(C);

  HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1);

  return true;

}


void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,

                                         TTI::UnrollingPreferences &UP,

                                         OptimizationRemarkEmitter *ORE) const {

  if (ST->getCPUDirective() == PPC::DIR_A2) {

    // The A2 is in-order with a deep pipeline, and concatenation unrolling

    // helps expose latency-hiding opportunities to the instruction scheduler.

    UP.Partial = UP.Runtime = true;


    // We unroll a lot on the A2 (hundreds of instructions), and the benefits

    // often outweigh the cost of a division to compute the trip count.

    UP.AllowExpensiveTripCount = true;

  }


  BaseT::getUnrollingPreferences(L, SE, UP, ORE);

}


void PPCTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,

                                       TTI::PeelingPreferences &PP) const {

  BaseT::getPeelingPreferences(L, SE, PP);

}


// This function returns true to allow using coldcc calling convention.

// Returning true results in coldcc being used for functions which are cold at

// all call sites when the callers of the functions are not calling any other

// non coldcc functions.


bool PPCTTIImpl::useColdCCForColdCall(Function &F) const {

  return EnablePPCColdCC;

}


bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) const {

  // On the A2, always unroll aggressively.

  if (ST->getCPUDirective() == PPC::DIR_A2)

    return true;


  return LoopHasReductions;

}


PPCTTIImpl::TTI::MemCmpExpansionOptions


PPCTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {

  TTI::MemCmpExpansionOptions Options;

  if (getST()->hasAltivec())

    Options.LoadSizes = {16, 8, 4, 2, 1};

  else

    Options.LoadSizes = {8, 4, 2, 1};


  Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);

  return Options;

}


bool PPCTTIImpl::enableInterleavedAccessVectorization() const { return true; }


unsigned PPCTTIImpl::getNumberOfRegisters(unsigned ClassID) const {

  assert(ClassID == GPRRC || ClassID == FPRRC ||

         ClassID == VRRC || ClassID == VSXRC);

  if (ST->hasVSX()) {

    assert(ClassID == GPRRC || ClassID == VSXRC || ClassID == VRRC);

    return ClassID == VSXRC ? 64 : 32;

  }

  assert(ClassID == GPRRC || ClassID == FPRRC || ClassID == VRRC);

  return 32;

}


unsigned PPCTTIImpl::getRegisterClassForType(bool Vector, Type *Ty) const {

  if (Vector)

    return ST->hasVSX() ? VSXRC : VRRC;

  if (Ty &&

      (Ty->getScalarType()->isFloatTy() || Ty->getScalarType()->isDoubleTy()))

    return ST->hasVSX() ? VSXRC : FPRRC;

  if (Ty && (Ty->getScalarType()->isFP128Ty() ||

             Ty->getScalarType()->isPPC_FP128Ty()))

    return VRRC;

  if (Ty && Ty->getScalarType()->isHalfTy())

    return VSXRC;

  return GPRRC;

}


const char* PPCTTIImpl::getRegisterClassName(unsigned ClassID) const {


  switch (ClassID) {

    default:

      llvm_unreachable("unknown register class");

      return "PPC::unknown register class";

    case GPRRC:       return "PPC::GPRRC";

    case FPRRC:       return "PPC::FPRRC";

    case VRRC:        return "PPC::VRRC";

    case VSXRC:       return "PPC::VSXRC";

  }

}


TypeSize


PPCTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {

  switch (K) {

  case TargetTransformInfo::RGK_Scalar:

    return TypeSize::getFixed(ST->isPPC64() ? 64 : 32);

  case TargetTransformInfo::RGK_FixedWidthVector:

    return TypeSize::getFixed(ST->hasAltivec() ? 128 : 0);

  case TargetTransformInfo::RGK_ScalableVector:

    return TypeSize::getScalable(0);

  }


  llvm_unreachable("Unsupported register kind");

}


unsigned PPCTTIImpl::getCacheLineSize() const {

  // Starting with P7 we have a cache line size of 128.

  unsigned Directive = ST->getCPUDirective();

  // Assume that Future CPU has the same cache line size as the others.

  if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 ||

      Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR10 ||

      Directive == PPC::DIR_PWR11 || Directive == PPC::DIR_PWR_FUTURE)

    return 128;


  // On other processors return a default of 64 bytes.

  return 64;

}


unsigned PPCTTIImpl::getPrefetchDistance() const {

  return 300;

}


unsigned PPCTTIImpl::getMaxInterleaveFactor(ElementCount VF) const {

  unsigned Directive = ST->getCPUDirective();

  // The 440 has no SIMD support, but floating-point instructions

  // have a 5-cycle latency, so unroll by 5x for latency hiding.

  if (Directive == PPC::DIR_440)

    return 5;


  // The A2 has no SIMD support, but floating-point instructions

  // have a 6-cycle latency, so unroll by 6x for latency hiding.

  if (Directive == PPC::DIR_A2)

    return 6;


  // FIXME: For lack of any better information, do no harm...

  if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500)

    return 1;


  // For P7 and P8, floating-point instructions have a 6-cycle latency and

  // there are two execution units, so unroll by 12x for latency hiding.

  // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready

  // FIXME: the same for P10 as previous gen until POWER10 scheduling is ready

  // Assume that future is the same as the others.

  if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 ||

      Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR10 ||

      Directive == PPC::DIR_PWR11 || Directive == PPC::DIR_PWR_FUTURE)

    return 12;


  // For most things, modern systems have two execution units (and

  // out-of-order execution).

  return 2;

}


// Returns a cost adjustment factor to adjust the cost of vector instructions

// on targets which there is overlap between the vector and scalar units,

// thereby reducing the overall throughput of vector code wrt. scalar code.

// An invalid instruction cost is returned if the type is an MMA vector type.


InstructionCost PPCTTIImpl::vectorCostAdjustmentFactor(unsigned Opcode,

                                                       Type *Ty1,

                                                       Type *Ty2) const {

  // If the vector type is of an MMA type (v256i1, v512i1), an invalid

  // instruction cost is returned. This is to signify to other cost computing

  // functions to return the maximum instruction cost in order to prevent any

  // opportunities for the optimizer to produce MMA types within the IR.

  if (isMMAType(Ty1))

    return InstructionCost::getInvalid();


  if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy())

    return InstructionCost(1);


  std::pair<InstructionCost, MVT> LT1 = getTypeLegalizationCost(Ty1);

  // If type legalization involves splitting the vector, we don't want to

  // double the cost at every step - only the last step.

  if (LT1.first != 1 || !LT1.second.isVector())

    return InstructionCost(1);


  int ISD = TLI->InstructionOpcodeToISD(Opcode);

  if (TLI->isOperationExpand(ISD, LT1.second))

    return InstructionCost(1);


  if (Ty2) {

    std::pair<InstructionCost, MVT> LT2 = getTypeLegalizationCost(Ty2);

    if (LT2.first != 1 || !LT2.second.isVector())

      return InstructionCost(1);

  }


  return InstructionCost(2);

}


InstructionCost PPCTTIImpl::getArithmeticInstrCost(

    unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,

    TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info,

    ArrayRef<const Value *> Args, const Instruction *CxtI) const {

  assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");


  InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Ty, nullptr);

  if (!CostFactor.isValid())

    return InstructionCost::getMax();


  // TODO: Handle more cost kinds.

  if (CostKind != TTI::TCK_RecipThroughput)

    return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,

                                         Op2Info, Args, CxtI);


  // Fallback to the default implementation.

  InstructionCost Cost = BaseT::getArithmeticInstrCost(

      Opcode, Ty, CostKind, Op1Info, Op2Info);

  return Cost * CostFactor;

}


InstructionCost PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,

                                           VectorType *DstTy, VectorType *SrcTy,

                                           ArrayRef<int> Mask,

                                           TTI::TargetCostKind CostKind,

                                           int Index, VectorType *SubTp,

                                           ArrayRef<const Value *> Args,

                                           const Instruction *CxtI) const {


  InstructionCost CostFactor =

      vectorCostAdjustmentFactor(Instruction::ShuffleVector, SrcTy, nullptr);

  if (!CostFactor.isValid())

    return InstructionCost::getMax();


  // Legalize the type.

  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(SrcTy);


  // PPC, for both Altivec/VSX, support cheap arbitrary permutations

  // (at least in the sense that there need only be one non-loop-invariant

  // instruction). We need one such shuffle instruction for each actual

  // register (this is not true for arbitrary shuffles, but is true for the

  // structured types of shuffles covered by TTI::ShuffleKind).

  return LT.first * CostFactor;

}


InstructionCost PPCTTIImpl::getCFInstrCost(unsigned Opcode,

                                           TTI::TargetCostKind CostKind,

                                           const Instruction *I) const {

  if (CostKind != TTI::TCK_RecipThroughput)

    return Opcode == Instruction::PHI ? 0 : 1;

  // Branches are assumed to be predicted.

  return 0;

}


InstructionCost PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,

                                             Type *Src,

                                             TTI::CastContextHint CCH,

                                             TTI::TargetCostKind CostKind,

                                             const Instruction *I) const {

  assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");


  InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Dst, Src);

  if (!CostFactor.isValid())

    return InstructionCost::getMax();


  InstructionCost Cost =

      BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);

  Cost *= CostFactor;

  // TODO: Allow non-throughput costs that aren't binary.

  if (CostKind != TTI::TCK_RecipThroughput)

    return Cost == 0 ? 0 : 1;

  return Cost;

}


InstructionCost PPCTTIImpl::getCmpSelInstrCost(

    unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,

    TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info,

    TTI::OperandValueInfo Op2Info, const Instruction *I) const {

  InstructionCost CostFactor =

      vectorCostAdjustmentFactor(Opcode, ValTy, nullptr);

  if (!CostFactor.isValid())

    return InstructionCost::getMax();


  InstructionCost Cost = BaseT::getCmpSelInstrCost(

      Opcode, ValTy, CondTy, VecPred, CostKind, Op1Info, Op2Info, I);

  // TODO: Handle other cost kinds.

  if (CostKind != TTI::TCK_RecipThroughput)

    return Cost;

  return Cost * CostFactor;

}


InstructionCost PPCTTIImpl::getVectorInstrCost(

    unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,

    const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC) const {

  assert(Val->isVectorTy() && "This must be a vector type");


  int ISD = TLI->InstructionOpcodeToISD(Opcode);

  assert(ISD && "Invalid opcode");


  InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Val, nullptr);

  if (!CostFactor.isValid())

    return InstructionCost::getMax();


  InstructionCost Cost =

      BaseT::getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1, VIC);

  Cost *= CostFactor;


  if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {

    // Double-precision scalars are already located in index #0 (or #1 if LE).

    if (ISD == ISD::EXTRACT_VECTOR_ELT &&

        Index == (ST->isLittleEndian() ? 1 : 0))

      return 0;


    return Cost;

  }

  if (Val->getScalarType()->isIntegerTy()) {

    unsigned EltSize = Val->getScalarSizeInBits();

    // Computing on 1 bit values requires extra mask or compare operations.

    unsigned MaskCostForOneBitSize = (VecMaskCost && EltSize == 1) ? 1 : 0;

    // Computing on non const index requires extra mask or compare operations.

    unsigned MaskCostForIdx = (Index != -1U) ? 0 : 1;

    if (ST->hasP9Altivec()) {

      // P10 has vxform insert which can handle non const index. The

      // MaskCostForIdx is for masking the index.

      // P9 has insert for const index. A move-to VSR and a permute/insert.

      // Assume vector operation cost for both (cost will be 2x on P9).

      if (ISD == ISD::INSERT_VECTOR_ELT) {

        if (ST->hasP10Vector())

          return CostFactor + MaskCostForIdx;

        if (Index != -1U)

          return 2 * CostFactor;

      } else if (ISD == ISD::EXTRACT_VECTOR_ELT) {

        // It's an extract.  Maybe we can do a cheap move-from VSR.

        unsigned EltSize = Val->getScalarSizeInBits();

        // P9 has both mfvsrd and mfvsrld for 64 bit integer.

        if (EltSize == 64 && Index != -1U)

          return 1;

        if (EltSize == 32) {

          unsigned MfvsrwzIndex = ST->isLittleEndian() ? 2 : 1;

          if (Index == MfvsrwzIndex)

            return 1;


          // For other indexs like non const, P9 has vxform extract. The

          // MaskCostForIdx is for masking the index.

          return CostFactor + MaskCostForIdx;

        }


        // We need a vector extract (or mfvsrld).  Assume vector operation cost.

        // The cost of the load constant for a vector extract is disregarded

        // (invariant, easily schedulable).

        return CostFactor + MaskCostForOneBitSize + MaskCostForIdx;

      }

    } else if (ST->hasDirectMove() && Index != -1U) {

      // Assume permute has standard cost.

      // Assume move-to/move-from VSR have 2x standard cost.

      if (ISD == ISD::INSERT_VECTOR_ELT)

        return 3;

      return 3 + MaskCostForOneBitSize;

    }

  }


  // Estimated cost of a load-hit-store delay.  This was obtained

  // experimentally as a minimum needed to prevent unprofitable

  // vectorization for the paq8p benchmark.  It may need to be

  // raised further if other unprofitable cases remain.

  unsigned LHSPenalty = 2;

  if (ISD == ISD::INSERT_VECTOR_ELT)

    LHSPenalty += 7;


  // Vector element insert/extract with Altivec is very expensive,

  // because they require store and reload with the attendant

  // processor stall for load-hit-store.  Until VSX is available,

  // these need to be estimated as very costly.

  if (ISD == ISD::EXTRACT_VECTOR_ELT ||

      ISD == ISD::INSERT_VECTOR_ELT)

    return LHSPenalty + Cost;


  return Cost;

}


InstructionCost PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,

                                            Align Alignment,

                                            unsigned AddressSpace,

                                            TTI::TargetCostKind CostKind,

                                            TTI::OperandValueInfo OpInfo,

                                            const Instruction *I) const {


  InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Src, nullptr);

  if (!CostFactor.isValid())

    return InstructionCost::getMax();


  if (TLI->getValueType(DL, Src,  true) == MVT::Other)

    return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,

                                  CostKind);

  // Legalize the type.

  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);

  assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&

         "Invalid Opcode");


  InstructionCost Cost =

      BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);

  // TODO: Handle other cost kinds.

  if (CostKind != TTI::TCK_RecipThroughput)

    return Cost;


  Cost *= CostFactor;


  bool IsAltivecType = ST->hasAltivec() &&

                       (LT.second == MVT::v16i8 || LT.second == MVT::v8i16 ||

                        LT.second == MVT::v4i32 || LT.second == MVT::v4f32);

  bool IsVSXType = ST->hasVSX() &&

                   (LT.second == MVT::v2f64 || LT.second == MVT::v2i64);


  // VSX has 32b/64b load instructions. Legalization can handle loading of

  // 32b/64b to VSR correctly and cheaply. But BaseT::getMemoryOpCost and

  // PPCTargetLowering can't compute the cost appropriately. So here we

  // explicitly check this case. There are also corresponding store

  // instructions.

  unsigned MemBits = Src->getPrimitiveSizeInBits();

  unsigned SrcBytes = LT.second.getStoreSize();

  if (ST->hasVSX() && IsAltivecType) {

    if (MemBits == 64 || (ST->hasP8Vector() && MemBits == 32))

      return 1;


    // Use lfiwax/xxspltw

    if (Opcode == Instruction::Load && MemBits == 32 && Alignment < SrcBytes)

      return 2;

  }


  // Aligned loads and stores are easy.

  if (!SrcBytes || Alignment >= SrcBytes)

    return Cost;


  // If we can use the permutation-based load sequence, then this is also

  // relatively cheap (not counting loop-invariant instructions): one load plus

  // one permute (the last load in a series has extra cost, but we're

  // neglecting that here). Note that on the P7, we could do unaligned loads

  // for Altivec types using the VSX instructions, but that's more expensive

  // than using the permutation-based load sequence. On the P8, that's no

  // longer true.

  if (Opcode == Instruction::Load && (!ST->hasP8Vector() && IsAltivecType) &&

      Alignment >= LT.second.getScalarType().getStoreSize())

    return Cost + LT.first; // Add the cost of the permutations.


  // For VSX, we can do unaligned loads and stores on Altivec/VSX types. On the

  // P7, unaligned vector loads are more expensive than the permutation-based

  // load sequence, so that might be used instead, but regardless, the net cost

  // is about the same (not counting loop-invariant instructions).

  if (IsVSXType || (ST->hasVSX() && IsAltivecType))

    return Cost;


  // Newer PPC supports unaligned memory access.

  if (TLI->allowsMisalignedMemoryAccesses(LT.second, 0))

    return Cost;


  // PPC in general does not support unaligned loads and stores. They'll need

  // to be decomposed based on the alignment factor.


  // Add the cost of each scalar load or store.

  Cost += LT.first * ((SrcBytes / Alignment.value()) - 1);


  // For a vector type, there is also scalarization overhead (only for

  // stores, loads are expanded using the vector-load + permutation sequence,

  // which is much less expensive).

  if (Src->isVectorTy() && Opcode == Instruction::Store)

    for (int I = 0, E = cast<FixedVectorType>(Src)->getNumElements(); I < E;

         ++I)

      Cost +=

          getVectorInstrCost(Instruction::ExtractElement, Src, CostKind, I,

                             nullptr, nullptr, TTI::VectorInstrContext::None);


  return Cost;

}


InstructionCost PPCTTIImpl::getInterleavedMemoryOpCost(

    unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,

    Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,

    bool UseMaskForCond, bool UseMaskForGaps) const {

  InstructionCost CostFactor =

      vectorCostAdjustmentFactor(Opcode, VecTy, nullptr);

  if (!CostFactor.isValid())

    return InstructionCost::getMax();


  if (UseMaskForCond || UseMaskForGaps)

    return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,

                                             Alignment, AddressSpace, CostKind,

                                             UseMaskForCond, UseMaskForGaps);


  assert(isa<VectorType>(VecTy) &&

         "Expect a vector type for interleaved memory op");


  // Legalize the type.

  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(VecTy);


  // Firstly, the cost of load/store operation.

  InstructionCost Cost =

      getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, CostKind);


  // PPC, for both Altivec/VSX, support cheap arbitrary permutations

  // (at least in the sense that there need only be one non-loop-invariant

  // instruction). For each result vector, we need one shuffle per incoming

  // vector (except that the first shuffle can take two incoming vectors

  // because it does not need to take itself).

  Cost += Factor*(LT.first-1);


  return Cost;

}


InstructionCost


PPCTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,

                                  TTI::TargetCostKind CostKind) const {


  if (!VPIntrinsic::isVPIntrinsic(ICA.getID()))

    return BaseT::getIntrinsicInstrCost(ICA, CostKind);


  if (ICA.getID() == Intrinsic::vp_load) {

    MemIntrinsicCostAttributes MICA(Intrinsic::masked_load, ICA.getReturnType(),

                                    Align(1), 0);

    return getMemIntrinsicInstrCost(MICA, CostKind);

  }


  if (ICA.getID() == Intrinsic::vp_store) {

    MemIntrinsicCostAttributes MICA(Intrinsic::masked_store,

                                    ICA.getArgTypes()[0], Align(1), 0);

    return getMemIntrinsicInstrCost(MICA, CostKind);

  }


  return InstructionCost::getInvalid();

}


bool PPCTTIImpl::areInlineCompatible(const Function *Caller,

                                     const Function *Callee) const {

  const TargetMachine &TM = getTLI()->getTargetMachine();


  const FeatureBitset &CallerBits =

      TM.getSubtargetImpl(*Caller)->getFeatureBits();

  const FeatureBitset &CalleeBits =

      TM.getSubtargetImpl(*Callee)->getFeatureBits();


  // Check that targets features are exactly the same. We can revisit to see if

  // we can improve this.

  return CallerBits == CalleeBits;

}


bool PPCTTIImpl::areTypesABICompatible(const Function *Caller,

                                       const Function *Callee,

                                       ArrayRef<Type *> Types) const {


  // We need to ensure that argument promotion does not

  // attempt to promote pointers to MMA types (__vector_pair

  // and __vector_quad) since these types explicitly cannot be

  // passed as arguments. Both of these types are larger than

  // the 128-bit Altivec vectors and have a scalar size of 1 bit.

  if (!BaseT::areTypesABICompatible(Caller, Callee, Types))

    return false;


  return llvm::none_of(Types, [](Type *Ty) {

    if (Ty->isSized())

      return Ty->isIntOrIntVectorTy(1) && Ty->getPrimitiveSizeInBits() > 128;

    return false;

  });

}


bool PPCTTIImpl::canSaveCmp(Loop *L, CondBrInst **BI, ScalarEvolution *SE,

                            LoopInfo *LI, DominatorTree *DT,

                            AssumptionCache *AC,

                            TargetLibraryInfo *LibInfo) const {

  // Process nested loops first.

  for (Loop *I : *L)

    if (canSaveCmp(I, BI, SE, LI, DT, AC, LibInfo))

      return false; // Stop search.


  HardwareLoopInfo HWLoopInfo(L);


  if (!HWLoopInfo.canAnalyze(*LI))

    return false;


  if (!isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo))

    return false;


  if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT))

    return false;


  *BI = HWLoopInfo.ExitBranch;

  return true;

}


bool PPCTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,

                               const TargetTransformInfo::LSRCost &C2) const {

  // PowerPC default behaviour here is "instruction number 1st priority".

  // If LsrNoInsnsCost is set, call default implementation.

  if (!LsrNoInsnsCost)

    return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, C1.NumIVMuls,

                    C1.NumBaseAdds, C1.ScaleCost, C1.ImmCost, C1.SetupCost) <

           std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, C2.NumIVMuls,

                    C2.NumBaseAdds, C2.ScaleCost, C2.ImmCost, C2.SetupCost);

  return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);

}


bool PPCTTIImpl::isNumRegsMajorCostOfLSR() const { return false; }


bool PPCTTIImpl::shouldBuildRelLookupTables() const {

  const PPCTargetMachine &TM = ST->getTargetMachine();

  // XCOFF hasn't implemented lowerRelativeReference, disable non-ELF for now.

  if (!TM.isELFv2ABI())

    return false;

  return BaseT::shouldBuildRelLookupTables();

}


bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,

                                    MemIntrinsicInfo &Info) const {

  switch (Inst->getIntrinsicID()) {

  case Intrinsic::ppc_altivec_lvx:

  case Intrinsic::ppc_altivec_lvxl:

  case Intrinsic::ppc_altivec_lvebx:

  case Intrinsic::ppc_altivec_lvehx:

  case Intrinsic::ppc_altivec_lvewx:

  case Intrinsic::ppc_vsx_lxvd2x:

  case Intrinsic::ppc_vsx_lxvw4x:

  case Intrinsic::ppc_vsx_lxvd2x_be:

  case Intrinsic::ppc_vsx_lxvw4x_be:

  case Intrinsic::ppc_vsx_lxvl:

  case Intrinsic::ppc_vsx_lxvll:

  case Intrinsic::ppc_vsx_lxvp: {

    Info.PtrVal = Inst->getArgOperand(0);

    Info.ReadMem = true;

    Info.WriteMem = false;

    return true;

  }

  case Intrinsic::ppc_altivec_stvx:

  case Intrinsic::ppc_altivec_stvxl:

  case Intrinsic::ppc_altivec_stvebx:

  case Intrinsic::ppc_altivec_stvehx:

  case Intrinsic::ppc_altivec_stvewx:

  case Intrinsic::ppc_vsx_stxvd2x:

  case Intrinsic::ppc_vsx_stxvw4x:

  case Intrinsic::ppc_vsx_stxvd2x_be:

  case Intrinsic::ppc_vsx_stxvw4x_be:

  case Intrinsic::ppc_vsx_stxvl:

  case Intrinsic::ppc_vsx_stxvll:

  case Intrinsic::ppc_vsx_stxvp: {

    Info.PtrVal = Inst->getArgOperand(1);

    Info.ReadMem = false;

    Info.WriteMem = true;

    return true;

  }

  case Intrinsic::ppc_stbcx:

  case Intrinsic::ppc_sthcx:

  case Intrinsic::ppc_stdcx:

  case Intrinsic::ppc_stwcx: {

    Info.PtrVal = Inst->getArgOperand(0);

    Info.ReadMem = false;

    Info.WriteMem = true;

    return true;

  }

  default:

    break;

  }


  return false;

}


bool PPCTTIImpl::supportsTailCallFor(const CallBase *CB) const {

  return TLI->supportsTailCallFor(CB);

}


// Target hook used by CodeGen to decide whether to expand vector predication

// intrinsics into scalar operations or to use special ISD nodes to represent

// them. The Target will not see the intrinsics.

TargetTransformInfo::VPLegalization


PPCTTIImpl::getVPLegalizationStrategy(const VPIntrinsic &PI) const {

  using VPLegalization = TargetTransformInfo::VPLegalization;

  unsigned Directive = ST->getCPUDirective();

  VPLegalization DefaultLegalization = BaseT::getVPLegalizationStrategy(PI);

  if (Directive != PPC::DIR_PWR10 && Directive != PPC::DIR_PWR_FUTURE &&

      (!Pwr9EVL || Directive != PPC::DIR_PWR9))

    return DefaultLegalization;


  if (!ST->isPPC64())

    return DefaultLegalization;


  unsigned IID = PI.getIntrinsicID();

  if (IID != Intrinsic::vp_load && IID != Intrinsic::vp_store)

    return DefaultLegalization;


  bool IsLoad = IID == Intrinsic::vp_load;

  Type *VecTy = IsLoad ? PI.getType() : PI.getOperand(0)->getType();

  EVT VT = TLI->getValueType(DL, VecTy, true);

  if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 &&

      VT != MVT::v16i8)

    return DefaultLegalization;


  auto IsAllTrueMask = [](Value *MaskVal) {

    if (Value *SplattedVal = getSplatValue(MaskVal))

      if (auto *ConstValue = dyn_cast<Constant>(SplattedVal))

        return ConstValue->isAllOnesValue();

    return false;

  };

  unsigned MaskIx = IsLoad ? 1 : 2;

  if (!IsAllTrueMask(PI.getOperand(MaskIx)))

    return DefaultLegalization;


  return VPLegalization(VPLegalization::Legal, VPLegalization::Legal);

}


bool PPCTTIImpl::hasActiveVectorLength() const {

  if (!PPCEVL || !ST->isPPC64())

    return false;

  unsigned CPU = ST->getCPUDirective();

  return CPU == PPC::DIR_PWR10 || CPU == PPC::DIR_PWR_FUTURE ||

         (Pwr9EVL && CPU == PPC::DIR_PWR9);

}


bool PPCTTIImpl::isLegalMaskedLoad(Type *DataType, Align Alignment,

                                   unsigned AddressSpace,

                                   TTI::MaskKind MaskKind) const {

  if (!hasActiveVectorLength())

    return false;


  auto IsLegalLoadWithLengthType = [](EVT VT) {

    if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)

      return false;

    return true;

  };


  return IsLegalLoadWithLengthType(TLI->getValueType(DL, DataType, true));

}


bool PPCTTIImpl::isLegalMaskedStore(Type *DataType, Align Alignment,

                                    unsigned AddressSpace,

                                    TTI::MaskKind MaskKind) const {

  return isLegalMaskedLoad(DataType, Alignment, AddressSpace);

}


InstructionCost


PPCTTIImpl::getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA,

                                     TTI::TargetCostKind CostKind) const {


  InstructionCost BaseCost = BaseT::getMemIntrinsicInstrCost(MICA, CostKind);


  unsigned Opcode;

  switch (MICA.getID()) {

  case Intrinsic::masked_load:

    Opcode = Instruction::Load;

    break;

  case Intrinsic::masked_store:

    Opcode = Instruction::Store;

    break;

  default:

    return BaseCost;

  }


  Type *DataTy = MICA.getDataType();

  Align Alignment = MICA.getAlignment();

  unsigned AddressSpace = MICA.getAddressSpace();


  auto VecTy = dyn_cast<FixedVectorType>(DataTy);

  if (!VecTy)

    return BaseCost;

  if (Opcode == Instruction::Load) {

    if (!isLegalMaskedLoad(VecTy->getScalarType(), Alignment, AddressSpace))

      return BaseCost;

  } else {

    if (!isLegalMaskedStore(VecTy->getScalarType(), Alignment, AddressSpace))

      return BaseCost;

  }

  if (VecTy->getPrimitiveSizeInBits() > 128)

    return BaseCost;


  // Cost is 1 (scalar compare) + 1 (scalar select) +

  //  1 * vectorCostAdjustmentFactor (vector load with length)

  // Maybe + 1 (scalar shift)

  InstructionCost Cost =

      1 + 1 + vectorCostAdjustmentFactor(Opcode, DataTy, nullptr);

  if (ST->getCPUDirective() != PPC::DIR_PWR_FUTURE ||

      VecTy->getScalarSizeInBits() != 8)

    Cost += 1; // need shift for length

  return Cost;

}


assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

BasicTTIImpl.h
This file provides a helper that implements much of the TTI interface in terms of the target-independ...

CodeMetrics.h

CommandLine.h

CostKind
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))

IntrinsicCostStrategy::InstructionCost
@ InstructionCost
Definition CostModel.cpp:51

VPLegalization
TargetTransformInfo::VPLegalization VPLegalization
Definition ExpandVectorPredication.cpp:33

InstCombiner.h
This file provides the interface for the instcombine pass implementation.

Options
static LVOptions Options
Definition LVOptions.cpp:25

F
#define F(x, y, z)
Definition MD5.cpp:54

I
#define I(x, y, z)
Definition MD5.cpp:57

Metrics
Machine Trace Metrics
Definition MachineTraceMetrics.cpp:62

II
uint64_t IntrinsicInst * II
Definition NVVMIntrRange.cpp:46

PPCEVL
static cl::opt< bool > PPCEVL("ppc-evl", cl::desc("Allow EVL type vp.load/vp.store"), cl::init(false), cl::Hidden)

VecMaskCost
static cl::opt< bool > VecMaskCost("ppc-vec-mask-cost", cl::desc("add masking cost for i1 vectors"), cl::init(true), cl::Hidden)

Pwr9EVL
static cl::opt< bool > Pwr9EVL("ppc-pwr9-evl", cl::desc("Allow vp.load and vp.store for pwr9"), cl::init(false), cl::Hidden)

DisablePPCConstHoist
static cl::opt< bool > DisablePPCConstHoist("disable-ppc-constant-hoisting", cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden)

SmallCTRLoopThreshold
static cl::opt< unsigned > SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden, cl::desc("Loops with a constant trip count smaller than " "this value will not use the count register."))

isMMAType
static bool isMMAType(Type *Ty)
Definition PPCTargetTransformInfo.cpp:325

EnablePPCColdCC
static cl::opt< bool > EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false), cl::desc("Enable using coldcc calling conv for cold " "internal functions"))

LsrNoInsnsCost
static cl::opt< bool > LsrNoInsnsCost("ppc-lsr-no-insns-cost", cl::Hidden, cl::init(false), cl::desc("Do not add instruction count to lsr cost model"))

PPCTargetTransformInfo.h
This file a TargetTransformInfoImplBase conforming object specific to the PPC target machine.

ProfDataUtils.h
This file contains the declarations for profiling metadata utility functions.

getNumElements
static unsigned getNumElements(Type *Ty)
Definition SLPVectorizer.cpp:330

TargetLibraryInfo.h

TargetLowering.h
This file describes how to lower LLVM code to machine code.

TargetSchedule.h

TargetTransformInfo.h
This pass exposes codegen information to IR-level passes.

Local.h

llvm::APInt
Class for arbitrary precision integers.
Definition APInt.h:78

llvm::ArrayRef
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40

llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition AssumptionCache.h:44

llvm::BasicBlock
LLVM Basic Block Representation.
Definition BasicBlock.h:62

llvm::BasicTTIImplBase< PPCTTIImpl >::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
Definition BasicTTIImpl.h:1594

llvm::BasicTTIImplBase< PPCTTIImpl >::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
Definition BasicTTIImpl.h:1048

llvm::BasicTTIImplBase< PPCTTIImpl >::shouldBuildRelLookupTables
bool shouldBuildRelLookupTables() const override
Definition BasicTTIImpl.h:640

llvm::BasicTTIImplBase< PPCTTIImpl >::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Definition BasicTTIImpl.h:1408

llvm::BasicTTIImplBase< PPCTTIImpl >::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
Definition BasicTTIImpl.h:714

llvm::BasicTTIImplBase< PPCTTIImpl >::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
Definition BasicTTIImpl.h:786

llvm::BasicTTIImplBase< PPCTTIImpl >::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
Definition BasicTTIImpl.h:1211

llvm::BasicTTIImplBase< PPCTTIImpl >::getTypeLegalizationCost
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Definition BasicTTIImpl.h:1012

llvm::BasicTTIImplBase< PPCTTIImpl >::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
Definition BasicTTIImpl.h:1461

llvm::BasicTTIImplBase< PPCTTIImpl >::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Definition BasicTTIImpl.h:1742

llvm::BasicTTIImplBase< PPCTTIImpl >::DL
const DataLayout & DL

llvm::BasicTTIImplBase< PPCTTIImpl >::getMemIntrinsicInstrCost
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
Definition BasicTTIImpl.h:3155

llvm::BasicTTIImplBase< PPCTTIImpl >::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Definition BasicTTIImpl.h:1547

llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition InstrTypes.h:1118

llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition InstrTypes.h:1294

llvm::CastInst::Create
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition Instructions.cpp:3054

llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676

llvm::CondBrInst
Conditional Branch instruction.
Definition Instructions.h:3221

llvm::Constant
This is an important base class in LLVM.
Definition Constant.h:43

llvm::Directive
Definition DirectiveEmitter.h:231

llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159

llvm::ElementCount
Definition TypeSize.h:298

llvm::FeatureBitset
Container class for subtarget features.
Definition SubtargetFeature.h:42

llvm::Function
Definition Function.h:65

llvm::IRBuilderBase::CreateInsertElement
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition IRBuilder.h:2627

llvm::IRBuilderBase::CreateExtractElement
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition IRBuilder.h:2615

llvm::IRBuilderBase::getInt32
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition IRBuilder.h:529

llvm::IRBuilderBase::CreateBitCast
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2242

llvm::InstCombiner
The core instruction combiner logic.
Definition InstCombiner.h:48

llvm::InstCombiner::getDataLayout
const DataLayout & getDataLayout() const
Definition InstCombiner.h:350

llvm::InstCombiner::getDominatorTree
DominatorTree & getDominatorTree() const
Definition InstCombiner.h:349

llvm::InstCombiner::Builder
BuilderTy & Builder
Definition InstCombiner.h:61

llvm::InstCombiner::getAssumptionCache
AssumptionCache & getAssumptionCache() const
Definition InstCombiner.h:347

llvm::InstructionCost
Definition InstructionCost.h:30

llvm::InstructionCost::getInvalid
static InstructionCost getInvalid(CostType Val=0)
Definition InstructionCost.h:82

llvm::InstructionCost::getMax
static InstructionCost getMax()
Definition InstructionCost.h:80

llvm::InstructionCost::isValid
bool isValid() const
Definition InstructionCost.h:88

llvm::Instruction
Definition Instruction.h:69

llvm::IntrinsicCostAttributes
Definition TargetTransformInfo.h:181

llvm::IntrinsicCostAttributes::getArgTypes
const SmallVectorImpl< Type * > & getArgTypes() const
Definition TargetTransformInfo.h:218

llvm::IntrinsicCostAttributes::getReturnType
Type * getReturnType() const
Definition TargetTransformInfo.h:214

llvm::IntrinsicCostAttributes::getID
Intrinsic::ID getID() const
Definition TargetTransformInfo.h:212

llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition IntrinsicInst.h:49

llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition IntrinsicInst.h:56

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68

llvm::LoadInst
An instruction for reading from memory.
Definition Instructions.h:181

llvm::LoopInfo
Definition LoopInfo.h:426

llvm::Loop
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40

llvm::MCSubtargetInfo::getFeatureBits
const FeatureBitset & getFeatureBits() const
Definition MCSubtargetInfo.h:115

llvm::MemIntrinsicCostAttributes
Information for memory intrinsic cost model.
Definition TargetTransformInfo.h:128

llvm::MemIntrinsicCostAttributes::getAlignment
Align getAlignment() const
Definition TargetTransformInfo.h:178

llvm::MemIntrinsicCostAttributes::getAddressSpace
unsigned getAddressSpace() const
Definition TargetTransformInfo.h:177

llvm::MemIntrinsicCostAttributes::getDataType
Type * getDataType() const
Definition TargetTransformInfo.h:175

llvm::MemIntrinsicCostAttributes::getID
Intrinsic::ID getID() const
Definition TargetTransformInfo.h:172

llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition OptimizationRemarkEmitter.h:33

llvm::PPCSubtarget::POPCNTD_Slow
@ POPCNTD_Slow
Definition PPCSubtarget.h:75

llvm::PPCSubtarget::POPCNTD_Unavailable
@ POPCNTD_Unavailable
Definition PPCSubtarget.h:74

llvm::PPCTTIImpl::isLSRCostLess
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
Definition PPCTargetTransformInfo.cpp:983

llvm::PPCTTIImpl::vectorCostAdjustmentFactor
InstructionCost vectorCostAdjustmentFactor(unsigned Opcode, Type *Ty1, Type *Ty2) const
Definition PPCTargetTransformInfo.cpp:564

llvm::PPCTTIImpl::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
Definition PPCTargetTransformInfo.cpp:596

llvm::PPCTTIImpl::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Definition PPCTargetTransformInfo.cpp:776

llvm::PPCTTIImpl::isLegalMaskedLoad
bool isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned AddressSpace, TTI::MaskKind MaskKind=TTI::MaskKind::VariableOrConstantMask) const override
Definition PPCTargetTransformInfo.cpp:1109

llvm::PPCTTIImpl::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
Definition PPCTargetTransformInfo.cpp:870

llvm::PPCTTIImpl::enableInterleavedAccessVectorization
bool enableInterleavedAccessVectorization() const override
Definition PPCTargetTransformInfo.cpp:458

llvm::PPCTTIImpl::getRegisterClassForType
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const override
Definition PPCTargetTransformInfo.cpp:471

llvm::PPCTTIImpl::enableMemCmpExpansion
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
Definition PPCTargetTransformInfo.cpp:447

llvm::PPCTTIImpl::getCacheLineSize
unsigned getCacheLineSize() const override
Definition PPCTargetTransformInfo.cpp:512

llvm::PPCTTIImpl::hasActiveVectorLength
bool hasActiveVectorLength() const override
Definition PPCTargetTransformInfo.cpp:1101

llvm::PPCTTIImpl::VRRC
@ VRRC
Definition PPCTargetTransformInfo.h:94

llvm::PPCTTIImpl::VSXRC
@ VSXRC
Definition PPCTargetTransformInfo.h:94

llvm::PPCTTIImpl::GPRRC
@ GPRRC
Definition PPCTargetTransformInfo.h:94

llvm::PPCTTIImpl::FPRRC
@ FPRRC
Definition PPCTargetTransformInfo.h:94

llvm::PPCTTIImpl::getMemIntrinsicInstrCost
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
Get memory intrinsic cost based on arguments.
Definition PPCTargetTransformInfo.cpp:1131

llvm::PPCTTIImpl::useColdCCForColdCall
bool useColdCCForColdCall(Function &F) const override
Definition PPCTargetTransformInfo.cpp:434

llvm::PPCTTIImpl::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
Definition PPCTargetTransformInfo.cpp:410

llvm::PPCTTIImpl::getTgtMemIntrinsic
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
Definition PPCTargetTransformInfo.cpp:1005

llvm::PPCTTIImpl::getRegisterBitWidth
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
Definition PPCTargetTransformInfo.cpp:499

llvm::PPCTTIImpl::isLegalMaskedStore
bool isLegalMaskedStore(Type *DataType, Align Alignment, unsigned AddressSpace, TTI::MaskKind MaskKind=TTI::MaskKind::VariableOrConstantMask) const override
Definition PPCTargetTransformInfo.cpp:1124

llvm::PPCTTIImpl::isNumRegsMajorCostOfLSR
bool isNumRegsMajorCostOfLSR() const override
Definition PPCTargetTransformInfo.cpp:995

llvm::PPCTTIImpl::getPrefetchDistance
unsigned getPrefetchDistance() const override
Definition PPCTargetTransformInfo.cpp:525

llvm::PPCTTIImpl::getVPLegalizationStrategy
TargetTransformInfo::VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const override
Definition PPCTargetTransformInfo.cpp:1066

llvm::PPCTTIImpl::areInlineCompatible
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
Definition PPCTargetTransformInfo.cpp:926

llvm::PPCTTIImpl::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
Definition PPCTargetTransformInfo.cpp:687

llvm::PPCTTIImpl::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const override
Definition PPCTargetTransformInfo.cpp:460

llvm::PPCTTIImpl::isHardwareLoopProfitable
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const override
Definition PPCTargetTransformInfo.cpp:347

llvm::PPCTTIImpl::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
Definition PPCTargetTransformInfo.cpp:426

llvm::PPCTTIImpl::getIntImmCostInst
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
Definition PPCTargetTransformInfo.cpp:237

llvm::PPCTTIImpl::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
Definition PPCTargetTransformInfo.cpp:641

llvm::PPCTTIImpl::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
Definition PPCTargetTransformInfo.cpp:905

llvm::PPCTTIImpl::getRegisterClassName
const char * getRegisterClassName(unsigned ClassID) const override
Definition PPCTargetTransformInfo.cpp:485

llvm::PPCTTIImpl::getInstructionCost
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const override
Definition PPCTargetTransformInfo.cpp:331

llvm::PPCTTIImpl::getMaxInterleaveFactor
unsigned getMaxInterleaveFactor(ElementCount VF) const override
Definition PPCTargetTransformInfo.cpp:529

llvm::PPCTTIImpl::shouldBuildRelLookupTables
bool shouldBuildRelLookupTables() const override
Definition PPCTargetTransformInfo.cpp:997

llvm::PPCTTIImpl::supportsTailCallFor
bool supportsTailCallFor(const CallBase *CB) const override
Definition PPCTargetTransformInfo.cpp:1058

llvm::PPCTTIImpl::getShuffleCost
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
Definition PPCTargetTransformInfo.cpp:617

llvm::PPCTTIImpl::canSaveCmp
bool canSaveCmp(Loop *L, CondBrInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const override
Definition PPCTargetTransformInfo.cpp:959

llvm::PPCTTIImpl::getIntImmCostIntrin
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
Definition PPCTargetTransformInfo.cpp:202

llvm::PPCTTIImpl::areTypesABICompatible
bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const override
Definition PPCTargetTransformInfo.cpp:940

llvm::PPCTTIImpl::getPopcntSupport
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
Definition PPCTargetTransformInfo.cpp:64

llvm::PPCTTIImpl::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Definition PPCTargetTransformInfo.cpp:670

llvm::PPCTTIImpl::enableAggressiveInterleaving
bool enableAggressiveInterleaving(bool LoopHasReductions) const override
Definition PPCTargetTransformInfo.cpp:438

llvm::PPCTTIImpl::getIntImmCost
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
Definition PPCTargetTransformInfo.cpp:171

llvm::PPCTTIImpl::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
Definition PPCTargetTransformInfo.cpp:650

llvm::PPCTTIImpl::instCombineIntrinsic
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
Definition PPCTargetTransformInfo.cpp:73

llvm::PPCTargetMachine
Common code between 32-bit and 64-bit PowerPC targets.
Definition PPCTargetMachine.h:26

llvm::PPCTargetMachine::isPPC64
bool isPPC64() const
Definition PPCTargetMachine.h:74

llvm::PPCTargetMachine::getTargetTransformInfo
TargetTransformInfo getTargetTransformInfo(const Function &F) const override
Get a TargetTransformInfo implementation for the target.
Definition PPCTargetMachine.cpp:560

llvm::PPCTargetMachine::isELFv2ABI
bool isELFv2ABI() const
Definition PPCTargetMachine.h:71

llvm::PoisonValue::get
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition Constants.cpp:2088

llvm::ScalarEvolution
The main scalar evolution driver.
Definition ScalarEvolution.h:625

llvm::ScalarEvolution::getSmallConstantTripCount
LLVM_ABI unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
Definition ScalarEvolution.cpp:8503

llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition SmallPtrSet.h:527

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1225

llvm::StoreInst
An instruction for storing to memory.
Definition Instructions.h:297

llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition TargetLibraryInfo.h:266

llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition TargetMachine.h:83

llvm::TargetMachine::getSubtargetImpl
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Definition TargetMachine.h:139

llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition TargetSchedule.h:31

llvm::TargetSchedModel::getIssueWidth
unsigned getIssueWidth() const
Maximum number of micro-ops that may be scheduled per cycle.
Definition TargetSchedule.h:111

llvm::TargetSchedModel::init
LLVM_ABI void init(const TargetSubtargetInfo *TSInfo, bool EnableSModel=true, bool EnableSItins=true)
Initialize the machine model for instruction scheduling.
Definition TargetSchedule.cpp:43

llvm::TargetTransformInfoImplBase::getVPLegalizationStrategy
virtual TargetTransformInfo::VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
Definition TargetTransformInfoImpl.h:1195

llvm::TargetTransformInfoImplBase::getIntImmCost
virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
Definition TargetTransformInfoImpl.h:587

llvm::TargetTransformInfoImplBase::getIntImmCostInst
virtual InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const
Definition TargetTransformInfoImpl.h:592

llvm::TargetTransformInfoImplBase::getInstructionCost
virtual InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const
Definition TargetTransformInfoImpl.h:86

llvm::TargetTransformInfoImplBase::isLSRCostLess
virtual bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const
Definition TargetTransformInfoImpl.h:328

llvm::TargetTransformInfoImplBase::getIntImmCostIntrin
virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
Definition TargetTransformInfoImpl.h:600

llvm::TargetTransformInfoImplBase::areTypesABICompatible
virtual bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const
Definition TargetTransformInfoImpl.h:1091

llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition TargetTransformInfo.h:271

llvm::TargetTransformInfo::VectorInstrContext
VectorInstrContext
Represents a hint about the context in which an insert/extract is used.
Definition TargetTransformInfo.h:1067

llvm::TargetTransformInfo::VectorInstrContext::None
@ None
The insert/extract is not used with a load/store.
Definition TargetTransformInfo.h:1068

llvm::TargetTransformInfo::MaskKind
MaskKind
Some targets only support masked load/store with a constant mask.
Definition TargetTransformInfo.h:901

llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition TargetTransformInfo.h:334

llvm::TargetTransformInfo::TCK_RecipThroughput
@ TCK_RecipThroughput
Reciprocal throughput.
Definition TargetTransformInfo.h:335

llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition TargetTransformInfo.h:1350

llvm::TargetTransformInfo::RGK_FixedWidthVector
@ RGK_FixedWidthVector
Definition TargetTransformInfo.h:1350

llvm::TargetTransformInfo::RGK_ScalableVector
@ RGK_ScalableVector
Definition TargetTransformInfo.h:1350

llvm::TargetTransformInfo::RGK_Scalar
@ RGK_Scalar
Definition TargetTransformInfo.h:1350

llvm::TargetTransformInfo::PopcntSupportKind
PopcntSupportKind
Flags indicating the kind of support for population count.
Definition TargetTransformInfo.h:825

llvm::TargetTransformInfo::PSK_SlowHardware
@ PSK_SlowHardware
Definition TargetTransformInfo.h:825

llvm::TargetTransformInfo::PSK_Software
@ PSK_Software
Definition TargetTransformInfo.h:825

llvm::TargetTransformInfo::PSK_FastHardware
@ PSK_FastHardware
Definition TargetTransformInfo.h:825

llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition TargetTransformInfo.h:360

llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition TargetTransformInfo.h:361

llvm::TargetTransformInfo::ShuffleKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Definition TargetTransformInfo.h:1246

llvm::TargetTransformInfo::CastContextHint
CastContextHint
Represents a hint about the context in which a cast is used.
Definition TargetTransformInfo.h:1581

llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82

llvm::TypeSize
Definition TypeSize.h:332

llvm::TypeSize::getFixed
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343

llvm::TypeSize::getScalable
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46

llvm::Type::getInt64Ty
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:314

llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:290

llvm::Type::getInt32Ty
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:313

llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:370

llvm::Type::getScalarSizeInBits
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236

llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:158

llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257

llvm::User
Definition User.h:44

llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition User.h:207

llvm::VPIntrinsic
This is the common base class for vector predication intrinsics.
Definition IntrinsicInst.h:572

llvm::VPIntrinsic::isVPIntrinsic
static LLVM_ABI bool isVPIntrinsic(Intrinsic::ID)
Definition IntrinsicInst.cpp:490

llvm::Value
LLVM Value Representation.
Definition Value.h:75

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255

llvm::VectorType
Base class of all SIMD vector types.
Definition DerivedTypes.h:482

llvm::cl::opt
Definition CommandLine.h:1454

uint64_t

Call
CallInst * Call
Definition ObjCARCOpts.cpp:2356

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34

llvm::ISD
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
Definition ISDOpcodes.h:24

llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576

llvm::ISD::INSERT_VECTOR_ELT
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565

llvm::Intrinsic::ID
unsigned ID
Definition GenericSSAContext.h:28

llvm::NVPTXAS::AddressSpace
AddressSpace
Definition NVPTXAddrSpace.h:21

llvm::PPC::DIR_E500mc
@ DIR_E500mc
Definition PPCSubtarget.h:51

llvm::PPC::DIR_PWR9
@ DIR_PWR9
Definition PPCSubtarget.h:61

llvm::PPC::DIR_PWR7
@ DIR_PWR7
Definition PPCSubtarget.h:59

llvm::PPC::DIR_PWR10
@ DIR_PWR10
Definition PPCSubtarget.h:62

llvm::PPC::DIR_440
@ DIR_440
Definition PPCSubtarget.h:42

llvm::PPC::DIR_PWR8
@ DIR_PWR8
Definition PPCSubtarget.h:60

llvm::PPC::DIR_A2
@ DIR_A2
Definition PPCSubtarget.h:49

llvm::PPC::DIR_PWR_FUTURE
@ DIR_PWR_FUTURE
Definition PPCSubtarget.h:64

llvm::PPC::DIR_E5500
@ DIR_E5500
Definition PPCSubtarget.h:52

llvm::PPC::DIR_PWR11
@ DIR_PWR11
Definition PPCSubtarget.h:63

llvm::cl::Hidden
@ Hidden
Definition CommandLine.h:138

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition CommandLine.h:444

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition FunctionInfo.h:25

llvm::Cost
InstructionCost Cost
Definition FunctionSpecialization.h:103

llvm::isInt
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643

llvm::getSplatValue
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
Definition VectorUtils.cpp:383

llvm::isShiftedMask_32
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition MathExtras.h:267

llvm::isShiftedMask_64
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273

llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279

llvm::getOrEnforceKnownAlignment
LLVM_ABI Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition Local.cpp:1581

llvm::none_of
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1752

llvm::isUInt
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189

llvm::isa
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547

llvm::extractBranchWeights
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
Definition ProfDataUtils.cpp:204

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559

llvm::VFParamKind::Vector
@ Vector
Definition VFABIDemangler.h:27

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39

llvm::Align::value
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77

llvm::CodeMetrics
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition CodeMetrics.h:34

llvm::CodeMetrics::collectEphemeralValues
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition CodeMetrics.cpp:71

llvm::EVT
Extended Value Type.
Definition ValueTypes.h:35

llvm::HardwareLoopInfo
Attributes of a target dependent hardware loop.
Definition TargetTransformInfo.h:103

llvm::HardwareLoopInfo::LoopDecrement
Value * LoopDecrement
Definition TargetTransformInfo.h:111

llvm::HardwareLoopInfo::canAnalyze
LLVM_ABI bool canAnalyze(LoopInfo &LI)
Definition TargetTransformInfo.cpp:65

llvm::HardwareLoopInfo::CountType
IntegerType * CountType
Definition TargetTransformInfo.h:110

llvm::HardwareLoopInfo::ExitBranch
CondBrInst * ExitBranch
Definition TargetTransformInfo.h:108

llvm::HardwareLoopInfo::isHardwareLoopCandidate
LLVM_ABI bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Definition TargetTransformInfo.cpp:128

llvm::MemIntrinsicInfo
Information about a load/store intrinsic defined by the target.
Definition TargetTransformInfo.h:76

llvm::TargetTransformInfo::LSRCost
Definition TargetTransformInfo.h:627

llvm::TargetTransformInfo::LSRCost::NumIVMuls
unsigned NumIVMuls
Definition TargetTransformInfo.h:633

llvm::TargetTransformInfo::LSRCost::ScaleCost
unsigned ScaleCost
Definition TargetTransformInfo.h:637

llvm::TargetTransformInfo::LSRCost::Insns
unsigned Insns
TODO: Some of these could be merged.
Definition TargetTransformInfo.h:630

llvm::TargetTransformInfo::LSRCost::ImmCost
unsigned ImmCost
Definition TargetTransformInfo.h:635

llvm::TargetTransformInfo::LSRCost::AddRecCost
unsigned AddRecCost
Definition TargetTransformInfo.h:632

llvm::TargetTransformInfo::LSRCost::NumRegs
unsigned NumRegs
Definition TargetTransformInfo.h:631

llvm::TargetTransformInfo::LSRCost::NumBaseAdds
unsigned NumBaseAdds
Definition TargetTransformInfo.h:634

llvm::TargetTransformInfo::LSRCost::SetupCost
unsigned SetupCost
Definition TargetTransformInfo.h:636

llvm::TargetTransformInfo::MemCmpExpansionOptions
Returns options for expansion of memcmp. IsZeroCmp is.
Definition TargetTransformInfo.h:1108

llvm::TargetTransformInfo::OperandValueInfo
Definition TargetTransformInfo.h:1283

llvm::TargetTransformInfo::PeelingPreferences
Definition TargetTransformInfo.h:765

llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition TargetTransformInfo.h:641

llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition TargetTransformInfo.h:702

llvm::TargetTransformInfo::UnrollingPreferences::Partial
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Definition TargetTransformInfo.h:698

llvm::TargetTransformInfo::UnrollingPreferences::AllowExpensiveTripCount
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
Definition TargetTransformInfo.h:707

llvm::TargetTransformInfo::VPLegalization
Definition TargetTransformInfo.h:2015

llvm::TargetTransformInfo::VPLegalization::Legal
@ Legal
Definition TargetTransformInfo.h:2018

llvm::cl::desc
Definition CommandLine.h:410