doxygen/WebAssemblyTargetTransformInfo_8cpp_source.html

//===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

///

/// \file

/// This file defines the WebAssembly-specific TargetTransformInfo

/// implementation.

///

//===----------------------------------------------------------------------===//


#include "WebAssemblyTargetTransformInfo.h"

#include "llvm/IR/IntrinsicInst.h"

#include "llvm/IR/IntrinsicsWebAssembly.h"

#include "llvm/Transforms/InstCombine/InstCombiner.h"


#include "llvm/CodeGen/CostTable.h"

using namespace llvm;


#define DEBUG_TYPE "wasmtti"


TargetTransformInfo::PopcntSupportKind


WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const {

  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");

  return TargetTransformInfo::PSK_FastHardware;

}


unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const {

  unsigned Result = BaseT::getNumberOfRegisters(ClassID);


  // For SIMD, use at least 16 registers, as a rough guess.

  bool Vector = (ClassID == 1);

  if (Vector)

    Result = std::max(Result, 16u);


  return Result;

}


TypeSize WebAssemblyTTIImpl::getRegisterBitWidth(

    TargetTransformInfo::RegisterKind K) const {

  switch (K) {

  case TargetTransformInfo::RGK_Scalar:

    return TypeSize::getFixed(64);

  case TargetTransformInfo::RGK_FixedWidthVector:

    return TypeSize::getFixed(getST()->hasSIMD128() ? 128 : 64);

  case TargetTransformInfo::RGK_ScalableVector:

    return TypeSize::getScalable(0);

  }


  llvm_unreachable("Unsupported register kind");

}


InstructionCost WebAssemblyTTIImpl::getArithmeticInstrCost(

    unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,

    TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info,

    ArrayRef<const Value *> Args, const Instruction *CxtI) const {


  if (ST->hasSIMD128()) {

    static const CostTblEntry ArithCostTbl[]{

        // extmul + (maybe awkward) shuffle

        {ISD::MUL, MVT::v8i8, 4},

        // 2x extmul + (okay) shuffle

        {ISD::MUL, MVT::v16i8, 4},

        // extmul

        {ISD::MUL, MVT::v4i16, 1},

        // extmul

        {ISD::MUL, MVT::v2i32, 1},

    };

    EVT DstVT = TLI->getValueType(DL, Ty);

    if (DstVT.isSimple()) {

      int ISD = TLI->InstructionOpcodeToISD(Opcode);

      if (const auto *Entry =

              CostTableLookup(ArithCostTbl, ISD, DstVT.getSimpleVT()))

        return Entry->Cost;

    }

  }


  InstructionCost Cost =

      BasicTTIImplBase<WebAssemblyTTIImpl>::getArithmeticInstrCost(

          Opcode, Ty, CostKind, Op1Info, Op2Info);


  if (auto *VTy = dyn_cast<VectorType>(Ty)) {

    switch (Opcode) {

    case Instruction::LShr:

    case Instruction::AShr:

    case Instruction::Shl:

      // SIMD128's shifts currently only accept a scalar shift count. For each

      // element, we'll need to extract, op, insert. The following is a rough

      // approximation.

      if (!Op2Info.isUniform())

        Cost =

            cast<FixedVectorType>(VTy)->getNumElements() *

            (TargetTransformInfo::TCC_Basic +

             getArithmeticInstrCost(Opcode, VTy->getElementType(), CostKind) +

             TargetTransformInfo::TCC_Basic);

      break;

    }

  }

  return Cost;

}


InstructionCost WebAssemblyTTIImpl::getCastInstrCost(

    unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH,

    TTI::TargetCostKind CostKind, const Instruction *I) const {

  int ISD = TLI->InstructionOpcodeToISD(Opcode);

  auto SrcTy = TLI->getValueType(DL, Src);

  auto DstTy = TLI->getValueType(DL, Dst);


  if (!SrcTy.isSimple() || !DstTy.isSimple()) {

    return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);

  }


  if (!ST->hasSIMD128()) {

    return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);

  }


  auto DstVT = DstTy.getSimpleVT();

  auto SrcVT = SrcTy.getSimpleVT();


  if (I && I->hasOneUser()) {

    auto *SingleUser = cast<Instruction>(*I->user_begin());

    int UserISD = TLI->InstructionOpcodeToISD(SingleUser->getOpcode());


    // extmul_low support

    if (UserISD == ISD::MUL &&

        (ISD == ISD::ZERO_EXTEND || ISD == ISD::SIGN_EXTEND)) {

      // Free low extensions.

      if ((SrcVT == MVT::v8i8 && DstVT == MVT::v8i16) ||

          (SrcVT == MVT::v4i16 && DstVT == MVT::v4i32) ||

          (SrcVT == MVT::v2i32 && DstVT == MVT::v2i64)) {

        return 0;

      }

      // Will require an additional extlow operation for the intermediate

      // i16/i32 value.

      if ((SrcVT == MVT::v4i8 && DstVT == MVT::v4i32) ||

          (SrcVT == MVT::v2i16 && DstVT == MVT::v2i64)) {

        return 1;

      }

    }

  }


  static constexpr TypeConversionCostTblEntry ConversionTbl[] = {

      // extend_low

      {ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1},

      {ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1},

      {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1},

      {ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1},

      {ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1},

      {ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1},

      // 2 x extend_low

      {ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i16, 2},

      {ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i16, 2},

      {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 2},

      {ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2},

      // extend_low, extend_high

      {ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2},

      {ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2},

      {ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2},

      {ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2},

      {ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2},

      {ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2},

      // 2x extend_low, extend_high

      {ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 4},

      {ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i32, 4},

      {ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 4},

      {ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 4},

      // 6x extend_low, extend_high

      {ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6},

      {ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6},

      // shuffle

      {ISD::TRUNCATE, MVT::v2i16, MVT::v2i32, 2},

      {ISD::TRUNCATE, MVT::v2i8, MVT::v2i32, 4},

      {ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 2},

      {ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 4},

      // narrow, and

      {ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 2},

      {ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2},

      // narrow, 2x and

      {ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3},

      // 3x narrow, 4x and

      {ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 7},

      {ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 7},

      // 7x narrow, 8x and

      {ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 15},

      // convert_i32x4

      {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1},

      {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1},

      {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1},

      {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1},

      // extend_low, convert

      {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2},

      {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2},

      {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2},

      {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2},

      // extend_low x 2, convert

      {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3},

      {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3},

      {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3},

      {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3},

      // several shuffles

      {ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10},

      {ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10},

      {ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 10},

      {ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10},

      /// trunc_sat, const, and, 3x narrow

      {ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 6},

      {ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 6},

      {ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 6},

      {ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 6},

      /// trunc_sat, const, and, narrow

      {ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 4},

      {ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 4},

      {ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 4},

      {ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 4},

      // 2x trunc_sat, const, 2x and, 3x narrow

      {ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 8},

      {ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 8},

      // 2x trunc_sat, const, 2x and, narrow

      {ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 6},

      {ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 6},

  };


  if (const auto *Entry =

          ConvertCostTableLookup(ConversionTbl, ISD, DstVT, SrcVT)) {

    return Entry->Cost;

  }


  return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);

}


WebAssemblyTTIImpl::TTI::MemCmpExpansionOptions


WebAssemblyTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {

  TTI::MemCmpExpansionOptions Options;


  Options.AllowOverlappingLoads = true;


  if (ST->hasSIMD128())

    Options.LoadSizes.push_back(16);


  Options.LoadSizes.append({8, 4, 2, 1});

  Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);

  Options.NumLoadsPerBlock = Options.MaxNumLoads;


  return Options;

}


InstructionCost WebAssemblyTTIImpl::getMemoryOpCost(

    unsigned Opcode, Type *Ty, Align Alignment, unsigned AddressSpace,

    TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo,

    const Instruction *I) const {

  // FIXME: Load latency isn't handled here

  if (!ST->hasSIMD128() || !isa<FixedVectorType>(Ty) ||

      (Opcode == Instruction::Load && CostKind == TTI::TCK_Latency)) {

    return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace,

                                  CostKind);

  }


  EVT VT = TLI->getValueType(DL, Ty, true);

  // Type legalization can't handle structs

  if (VT == MVT::Other)

    return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace,

                                  CostKind);


  auto LT = getTypeLegalizationCost(Ty);

  if (!LT.first.isValid())

    return InstructionCost::getInvalid();


  int ISD = TLI->InstructionOpcodeToISD(Opcode);

  unsigned width = VT.getSizeInBits();

  if (ISD == ISD::LOAD) {

    // 128-bit loads are a single instruction. 32-bit and 64-bit vector loads

    // can be lowered to load32_zero and load64_zero respectively. Assume SIMD

    // loads are twice as expensive as scalar.

    switch (width) {

    default:

      break;

    case 32:

    case 64:

    case 128:

      return 2;

    }

  } else if (ISD == ISD::STORE) {

    // For stores, we can use store lane operations.

    switch (width) {

    default:

      break;

    case 8:

    case 16:

    case 32:

    case 64:

    case 128:

      return 2;

    }

  }


  return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace, CostKind);

}


InstructionCost WebAssemblyTTIImpl::getShuffleCost(

    TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy,

    ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index,

    VectorType *SubTp, ArrayRef<const Value *> Args,

    const Instruction *CxtI) const {

  // Canonicalize the ShuffleKind in case optimizations didn't.

  //  Otherwise, we might end up with the wrong ShuffleKind to match against.


  Kind = improveShuffleKindFromMask(Kind, Mask, SrcTy, Index, SubTp);


  // Wasm SIMD128 has native splat instructions for all lane types.

  if (ST->hasSIMD128() && Kind == TTI::SK_Broadcast &&

      isa<FixedVectorType>(SrcTy))

    return 1;


  return BaseT::getShuffleCost(Kind, DstTy, SrcTy, Mask, CostKind, Index, SubTp,

                               Args, CxtI);

}


InstructionCost WebAssemblyTTIImpl::getInterleavedMemoryOpCost(

    unsigned Opcode, Type *Ty, unsigned Factor, ArrayRef<unsigned> Indices,

    Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,

    bool UseMaskForCond, bool UseMaskForGaps) const {

  assert(Factor >= 2 && "Invalid interleave factor");


  auto *VecTy = cast<VectorType>(Ty);

  if (!ST->hasSIMD128() || !isa<FixedVectorType>(VecTy)) {

    return InstructionCost::getInvalid();

  }


  if (UseMaskForCond || UseMaskForGaps)

    return BaseT::getInterleavedMemoryOpCost(Opcode, Ty, Factor, Indices,

                                             Alignment, AddressSpace, CostKind,

                                             UseMaskForCond, UseMaskForGaps);


  constexpr unsigned MaxInterleaveFactor = 4;

  if (Factor <= MaxInterleaveFactor) {

    unsigned MinElts = VecTy->getElementCount().getKnownMinValue();

    // Ensure the number of vector elements is greater than 1.

    if (MinElts < 2 || MinElts % Factor != 0)

      return InstructionCost::getInvalid();


    unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());

    // Ensure the element type is legal.

    if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)

      return InstructionCost::getInvalid();


    if (Factor != 2 && Factor != 4)

      return InstructionCost::getInvalid();


    auto *SubVecTy =

        VectorType::get(VecTy->getElementType(),

                        VecTy->getElementCount().divideCoefficientBy(Factor));

    InstructionCost MemCost =

        getMemoryOpCost(Opcode, SubVecTy, Alignment, AddressSpace, CostKind);


    unsigned VecSize = DL.getTypeSizeInBits(SubVecTy);

    unsigned MaxVecSize = 128;

    unsigned NumAccesses =

        std::max<unsigned>(1, (MinElts * ElSize + MaxVecSize - 1) / VecSize);


    // A stride of two is commonly supported via dedicated instructions, so it

    // should be relatively cheap for all element sizes. A stride of four is

    // more expensive as it will likely require more shuffles. Using two

    // simd128 inputs is considered more expensive and we mainly account for

    // shuffling two inputs (32 bytes), but we do model 4 x v4i32 to enable

    // arithmetic kernels with smaller (i8/i16) inputs.

    static const CostTblEntry ShuffleCostTbl[] = {

        // One reg.

        {2, MVT::v2i8, 1},  // interleave 2 x 2i8 into 4i8

        {2, MVT::v4i8, 1},  // interleave 2 x 4i8 into 8i8

        {2, MVT::v8i8, 1},  // interleave 2 x 8i8 into 16i8

        {2, MVT::v2i16, 1}, // interleave 2 x 2i16 into 4i16

        {2, MVT::v4i16, 1}, // interleave 2 x 4i16 into 8i16

        {2, MVT::v2i32, 1}, // interleave 2 x 2i32 into 4i32

        {2, MVT::v2f32, 1}, // interleave 2 x 2f32 into 4f32


        // Two regs.

        {2, MVT::v16i8, 2}, // interleave 2 x 16i8 into 32i8

        {2, MVT::v8i16, 2}, // interleave 2 x 8i16 into 16i16

        {2, MVT::v4i32, 2}, // interleave 2 x 4i32 into 8i32

        {2, MVT::v4f32, 2}, // interleave 2 x 4f32 into 8f32


        // One reg.

        {4, MVT::v2i8, 4},  // interleave 4 x 2i8 into 8i8

        {4, MVT::v4i8, 4},  // interleave 4 x 4i8 into 16i8

        {4, MVT::v2i16, 4}, // interleave 4 x 2i16 into 8i16


        // Two regs.

        {4, MVT::v8i8, 16}, // interleave 4 x 8i8 into 32i8

        {4, MVT::v4i16, 8}, // interleave 4 x 4i16 into 16i16

        {4, MVT::v2i32, 4}, // interleave 4 x 2i32 into 8i32

        {4, MVT::v2f32, 4}, // interleave 4 x 2f32 into 8f32


        // Four regs.

        {4, MVT::v4i32, 16}, // interleave 4 x 4i32 into 16i32

    };


    EVT ETy = TLI->getValueType(DL, SubVecTy);

    if (const auto *Entry =

            CostTableLookup(ShuffleCostTbl, Factor, ETy.getSimpleVT()))

      return Entry->Cost + (NumAccesses * MemCost);

  }


  return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,

                                           Alignment, AddressSpace, CostKind,

                                           UseMaskForCond, UseMaskForGaps);

}


InstructionCost WebAssemblyTTIImpl::getVectorInstrCost(

    unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,

    const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC) const {

  InstructionCost Cost = BasicTTIImplBase::getVectorInstrCost(

      Opcode, Val, CostKind, Index, Op0, Op1, VIC);


  // SIMD128's insert/extract currently only take constant indices.

  if (Index == -1u)

    return Cost + 25 * TargetTransformInfo::TCC_Expensive;


  return Cost;

}


InstructionCost WebAssemblyTTIImpl::getPartialReductionCost(

    unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,

    ElementCount VF, TTI::PartialReductionExtendKind OpAExtend,

    TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,

    TTI::TargetCostKind CostKind, std::optional<FastMathFlags> FMF) const {

  InstructionCost Invalid = InstructionCost::getInvalid();

  if (!VF.isFixed() || !ST->hasSIMD128())

    return Invalid;


  if (CostKind != TTI::TCK_RecipThroughput)

    return Invalid;


  if (Opcode != Instruction::Add)

    return Invalid;


  EVT AccumEVT = EVT::getEVT(AccumType);

  // TODO: Add i64 accumulator.

  if (AccumEVT != MVT::i32)

    return Invalid;


  // Possible options:

  // - i16x8.extadd_pairwise_i8x16_sx

  // - i32x4.extadd_pairwise_i16x8_sx

  // - i32x4.dot_i16x8_s

  // Only try to support dot, for now.


  EVT InputEVT = EVT::getEVT(InputTypeA);

  if (!((InputEVT == MVT::i16 && VF.getFixedValue() == 8) ||

        (InputEVT == MVT::i8 && VF.getFixedValue() == 16))) {

    return Invalid;

  }


  if (OpAExtend == TTI::PR_None)

    return Invalid;


  InstructionCost Cost(TTI::TCC_Basic);

  if (!BinOp)

    return Cost;


  if (OpAExtend != OpBExtend)

    return Invalid;


  if (*BinOp != Instruction::Mul)

    return Invalid;


  if (InputTypeA != InputTypeB)

    return Invalid;


  // Signed inputs can lower to dot

  if (InputEVT == MVT::i16 && VF.getFixedValue() == 8)

    return OpAExtend == TTI::PR_SignExtend ? Cost : Cost * 2;


  // Double the size of the lowered sequence.

  if (InputEVT == MVT::i8 && VF.getFixedValue() == 16)

    return OpAExtend == TTI::PR_SignExtend ? Cost * 2 : Cost * 4;


  return Invalid;

}


TTI::ReductionShuffle WebAssemblyTTIImpl::getPreferredExpandedReductionShuffle(

    const IntrinsicInst *II) const {


  switch (II->getIntrinsicID()) {

  default:

    break;

  case Intrinsic::vector_reduce_fadd:

    return TTI::ReductionShuffle::Pairwise;

  }

  return TTI::ReductionShuffle::SplitHalf;

}


void WebAssemblyTTIImpl::getUnrollingPreferences(

    Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP,

    OptimizationRemarkEmitter *ORE) const {

  // Scan the loop: don't unroll loops with calls. This is a standard approach

  // for most (all?) targets.

  for (BasicBlock *BB : L->blocks())

    for (Instruction &I : *BB)

      if (isa<CallInst>(I) || isa<InvokeInst>(I))

        if (const Function *F = cast<CallBase>(I).getCalledFunction())

          if (isLoweredToCall(F))

            return;


  // The chosen threshold is within the range of 'LoopMicroOpBufferSize' of

  // the various microarchitectures that use the BasicTTI implementation and

  // has been selected through heuristics across multiple cores and runtimes.

  UP.Partial = UP.Runtime = UP.UpperBound = true;

  UP.PartialThreshold = 30;


  // Avoid unrolling when optimizing for size.

  UP.OptSizeThreshold = 0;

  UP.PartialOptSizeThreshold = 0;


  // Set number of instructions optimized when "back edge"

  // becomes "fall through" to default value of 2.

  UP.BEInsns = 2;

}


bool WebAssemblyTTIImpl::supportsTailCalls() const {

  return getST()->hasTailCall();

}


bool WebAssemblyTTIImpl::isProfitableToSinkOperands(

    Instruction *I, SmallVectorImpl<Use *> &Ops) const {

  using namespace llvm::PatternMatch;


  if (!I->getType()->isVectorTy() || !I->isShift())

    return false;


  Value *V = I->getOperand(1);

  // We dont need to sink constant splat.

  if (isa<Constant>(V))

    return false;


  if (match(V, m_Shuffle(m_InsertElt(m_Value(), m_Value(), m_ZeroInt()),

                         m_Value(), m_ZeroMask()))) {

    // Sink insert

    Ops.push_back(&cast<Instruction>(V)->getOperandUse(0));

    // Sink shuffle

    Ops.push_back(&I->getOperandUse(1));

    return true;

  }


  return false;

}


/// Attempt to convert [relaxed_]swizzle to shufflevector if the mask is

/// constant.


static Value *simplifyWasmSwizzle(const IntrinsicInst &II,

                                  InstCombiner::BuilderTy &Builder,

                                  bool IsRelaxed) {

  auto *V = dyn_cast<Constant>(II.getArgOperand(1));

  if (!V)

    return nullptr;


  auto *VecTy = cast<FixedVectorType>(II.getType());

  unsigned NumElts = VecTy->getNumElements();

  assert(NumElts == 16);


  // Construct a shuffle mask from constant integers or UNDEFs.

  int Indexes[16];

  bool AnyOutOfBounds = false;


  for (unsigned I = 0; I < NumElts; ++I) {

    Constant *COp = V->getAggregateElement(I);

    if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))

      return nullptr;


    if (isa<UndefValue>(COp)) {

      Indexes[I] = -1;

      continue;

    }


    if (IsRelaxed && cast<ConstantInt>(COp)->getSExtValue() >= NumElts) {

      // The relaxed_swizzle operation always returns 0 if the lane index is

      // less than 0 when interpreted as a signed value. For lane indices above

      // 15, however, it can choose between returning 0 or the lane at `Index %

      // 16`. However, the choice must be made consistently. As the WebAssembly

      // spec states:

      //

      // "The result of relaxed operators are implementation-dependent, because

      // the set of possible results may depend on properties of the host

      // environment, such as its hardware. Technically, their behaviour is

      // controlled by a set of global parameters to the semantics that an

      // implementation can instantiate in different ways. These choices are

      // fixed, that is, parameters are constant during the execution of any

      // given program."

      //

      // The WebAssembly runtime may choose differently from us, so we can't

      // optimize a relaxed swizzle with lane indices above 15.

      return nullptr;

    }


    uint64_t Index = cast<ConstantInt>(COp)->getZExtValue();

    if (Index >= NumElts) {

      AnyOutOfBounds = true;

      // If there are out-of-bounds indices, the swizzle instruction returns

      // zeroes in those lanes. We'll provide an all-zeroes vector as the

      // second argument to shufflevector and read the first element from it.

      Indexes[I] = NumElts;

      continue;

    }


    Indexes[I] = Index;

  }


  auto *V1 = II.getArgOperand(0);

  auto *V2 =

      AnyOutOfBounds ? Constant::getNullValue(VecTy) : PoisonValue::get(VecTy);


  return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes, NumElts));

}


std::optional<Instruction *>


WebAssemblyTTIImpl::instCombineIntrinsic(InstCombiner &IC,

                                         IntrinsicInst &II) const {

  Intrinsic::ID IID = II.getIntrinsicID();

  switch (IID) {

  case Intrinsic::wasm_swizzle:

  case Intrinsic::wasm_relaxed_swizzle:

    if (Value *V = simplifyWasmSwizzle(

            II, IC.Builder, IID == Intrinsic::wasm_relaxed_swizzle)) {

      return IC.replaceInstUsesWith(II, V);

    }

    break;

  }


  return std::nullopt;

}


assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

CostKind
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))

CostTable.h
Cost tables and simple lookup functions.

MaxVecSize
static const int MaxVecSize
Definition DXILDataScalarization.cpp:28

IntrinsicInst.h

InstCombiner.h
This file provides the interface for the instcombine pass implementation.

Ops
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Definition ItaniumDemangle.h:3391

Options
static LVOptions Options
Definition LVOptions.cpp:25

MaxInterleaveFactor
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
Definition LoopVectorizationLegality.cpp:83

F
#define F(x, y, z)
Definition MD5.cpp:54

I
#define I(x, y, z)
Definition MD5.cpp:57

getCalledFunction
static const Function * getCalledFunction(const Value *V)
Definition MemoryBuiltins.cpp:157

II
uint64_t IntrinsicInst * II
Definition NVVMIntrRange.cpp:46

simplifyWasmSwizzle
static Value * simplifyWasmSwizzle(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsRelaxed)
Attempt to convert [relaxed_]swizzle to shufflevector if the mask is constant.
Definition WebAssemblyTargetTransformInfo.cpp:552

WebAssemblyTargetTransformInfo.h
This file a TargetTransformInfoImplBase conforming object specific to the WebAssembly target machine.

llvm::ArrayRef
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40

llvm::BasicBlock
LLVM Basic Block Representation.
Definition BasicBlock.h:62

llvm::BasicTTIImplBase< WebAssemblyTTIImpl >::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
Definition BasicTTIImpl.h:1614

llvm::BasicTTIImplBase::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
Definition BasicTTIImpl.h:1064

llvm::BasicTTIImplBase< WebAssemblyTTIImpl >::improveShuffleKindFromMask
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
Definition BasicTTIImpl.h:1142

llvm::BasicTTIImplBase< WebAssemblyTTIImpl >::getShuffleCost
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
Definition BasicTTIImpl.h:1198

llvm::BasicTTIImplBase< WebAssemblyTTIImpl >::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
Definition BasicTTIImpl.h:1227

llvm::BasicTTIImplBase< WebAssemblyTTIImpl >::getTypeLegalizationCost
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Definition BasicTTIImpl.h:1025

llvm::BasicTTIImplBase::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
Definition BasicTTIImpl.h:1477

llvm::BasicTTIImplBase< WebAssemblyTTIImpl >::DL
const DataLayout & DL

llvm::BasicTTIImplBase< WebAssemblyTTIImpl >::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Definition BasicTTIImpl.h:1563

llvm::Constant
This is an important base class in LLVM.
Definition Constant.h:43

llvm::Constant::getNullValue
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition Constants.cpp:367

llvm::Constant::getAggregateElement
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition Constants.cpp:446

llvm::ElementCount
Definition TypeSize.h:298

llvm::Function
Definition Function.h:65

llvm::InstCombiner
The core instruction combiner logic.
Definition InstCombiner.h:49

llvm::InstCombiner::replaceInstUsesWith
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition InstCombiner.h:423

llvm::InstCombiner::BuilderTy
IRBuilder< TargetFolder, IRBuilderInstCombineInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
Definition InstCombiner.h:75

llvm::InstCombiner::Builder
BuilderTy Builder
Definition InstCombiner.h:76

llvm::InstructionCost
Definition InstructionCost.h:30

llvm::InstructionCost::getInvalid
static InstructionCost getInvalid(CostType Val=0)
Definition InstructionCost.h:82

llvm::Instruction
Definition Instruction.h:70

llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition IntrinsicInst.h:49

llvm::Loop
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40

llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition OptimizationRemarkEmitter.h:33

llvm::PoisonValue::get
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition Constants.cpp:2034

llvm::ScalarEvolution
The main scalar evolution driver.
Definition ScalarEvolution.h:616

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition SmallVector.h:592

llvm::TargetTransformInfoImplBase::getNumberOfRegisters
virtual unsigned getNumberOfRegisters(unsigned ClassID) const
Definition TargetTransformInfoImpl.h:612

llvm::TargetTransformInfoImplBase::isLoweredToCall
virtual bool isLoweredToCall(const Function *F) const
Definition TargetTransformInfoImpl.h:221

llvm::TargetTransformInfo::ReductionShuffle
ReductionShuffle
Definition TargetTransformInfo.h:1983

llvm::TargetTransformInfo::ReductionShuffle::Pairwise
@ Pairwise
Definition TargetTransformInfo.h:1983

llvm::TargetTransformInfo::ReductionShuffle::SplitHalf
@ SplitHalf
Definition TargetTransformInfo.h:1983

llvm::TargetTransformInfo::VectorInstrContext
VectorInstrContext
Represents a hint about the context in which an insert/extract is used.
Definition TargetTransformInfo.h:1068

llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition TargetTransformInfo.h:331

llvm::TargetTransformInfo::TCK_RecipThroughput
@ TCK_RecipThroughput
Reciprocal throughput.
Definition TargetTransformInfo.h:332

llvm::TargetTransformInfo::TCK_Latency
@ TCK_Latency
The latency of instruction.
Definition TargetTransformInfo.h:333

llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition TargetTransformInfo.h:1352

llvm::TargetTransformInfo::RGK_FixedWidthVector
@ RGK_FixedWidthVector
Definition TargetTransformInfo.h:1352

llvm::TargetTransformInfo::RGK_ScalableVector
@ RGK_ScalableVector
Definition TargetTransformInfo.h:1352

llvm::TargetTransformInfo::RGK_Scalar
@ RGK_Scalar
Definition TargetTransformInfo.h:1352

llvm::TargetTransformInfo::PopcntSupportKind
PopcntSupportKind
Flags indicating the kind of support for population count.
Definition TargetTransformInfo.h:822

llvm::TargetTransformInfo::PSK_FastHardware
@ PSK_FastHardware
Definition TargetTransformInfo.h:822

llvm::TargetTransformInfo::PartialReductionExtendKind
PartialReductionExtendKind
Definition TargetTransformInfo.h:270

llvm::TargetTransformInfo::PR_SignExtend
@ PR_SignExtend
Definition TargetTransformInfo.h:272

llvm::TargetTransformInfo::PR_None
@ PR_None
Definition TargetTransformInfo.h:271

llvm::TargetTransformInfo::TCC_Expensive
@ TCC_Expensive
The cost of a 'div' instruction on x86.
Definition TargetTransformInfo.h:359

llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition TargetTransformInfo.h:358

llvm::TargetTransformInfo::ShuffleKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Definition TargetTransformInfo.h:1248

llvm::TargetTransformInfo::SK_Broadcast
@ SK_Broadcast
Broadcast element 0 to all other elements.
Definition TargetTransformInfo.h:1249

llvm::TargetTransformInfo::CastContextHint
CastContextHint
Represents a hint about the context in which a cast is used.
Definition TargetTransformInfo.h:1586

llvm::TypeSize
Definition TypeSize.h:332

llvm::TypeSize::getFixed
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343

llvm::TypeSize::getScalable
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46

llvm::Value
LLVM Value Representation.
Definition Value.h:75

llvm::VectorType
Base class of all SIMD vector types.
Definition DerivedTypes.h:490

llvm::VectorType::get
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.

llvm::WebAssemblyTTIImpl::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *Ty, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) const override
Definition WebAssemblyTargetTransformInfo.cpp:321

llvm::WebAssemblyTTIImpl::getPartialReductionCost
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const override
Definition WebAssemblyTargetTransformInfo.cpp:424

llvm::WebAssemblyTTIImpl::supportsTailCalls
bool supportsTailCalls() const override
Definition WebAssemblyTargetTransformInfo.cpp:522

llvm::WebAssemblyTTIImpl::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Definition WebAssemblyTargetTransformInfo.cpp:250

llvm::WebAssemblyTTIImpl::getPopcntSupport
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
Definition WebAssemblyTargetTransformInfo.cpp:26

llvm::WebAssemblyTTIImpl::instCombineIntrinsic
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
Definition WebAssemblyTargetTransformInfo.cpp:618

llvm::WebAssemblyTTIImpl::getRegisterBitWidth
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
Definition WebAssemblyTargetTransformInfo.cpp:42

llvm::WebAssemblyTTIImpl::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
Definition WebAssemblyTargetTransformInfo.cpp:56

llvm::WebAssemblyTTIImpl::isProfitableToSinkOperands
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Definition WebAssemblyTargetTransformInfo.cpp:526

llvm::WebAssemblyTTIImpl::getPreferredExpandedReductionShuffle
TTI::ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const override
Definition WebAssemblyTargetTransformInfo.cpp:483

llvm::WebAssemblyTTIImpl::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
Definition WebAssemblyTargetTransformInfo.cpp:495

llvm::WebAssemblyTTIImpl::enableMemCmpExpansion
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
Definition WebAssemblyTargetTransformInfo.cpp:235

llvm::WebAssemblyTTIImpl::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
Definition WebAssemblyTargetTransformInfo.cpp:411

llvm::WebAssemblyTTIImpl::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
Definition WebAssemblyTargetTransformInfo.cpp:105

llvm::WebAssemblyTTIImpl::getShuffleCost
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
Definition WebAssemblyTargetTransformInfo.cpp:302

llvm::WebAssemblyTTIImpl::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const override
Definition WebAssemblyTargetTransformInfo.cpp:31

llvm::details::FixedOrScalableQuantity::getFixedValue
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200

llvm::details::FixedOrScalableQuantity::isFixed
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:171

uint64_t

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

llvm::ISD
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
Definition ISDOpcodes.h:24

llvm::ISD::STORE
@ STORE
Definition ISDOpcodes.h:1183

llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition ISDOpcodes.h:889

llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition ISDOpcodes.h:1182

llvm::ISD::SINT_TO_FP
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:888

llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:852

llvm::ISD::FP_TO_UINT
@ FP_TO_UINT
Definition ISDOpcodes.h:935

llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:858

llvm::ISD::FP_TO_SINT
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:934

llvm::ISD::MUL
@ MUL
Definition ISDOpcodes.h:266

llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:864

llvm::Intrinsic::ID
unsigned ID
Definition GenericSSAContext.h:28

llvm::MIPatternMatch::m_ZeroInt
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
Definition MIPatternMatch.h:278

llvm::NVPTXAS::AddressSpace
AddressSpace
Definition NVPTXAddrSpace.h:21

llvm::PatternMatch
Definition PatternMatch.h:51

llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition PatternMatch.h:53

llvm::PatternMatch::m_Value
auto m_Value()
Match an arbitrary value and ignore it.
Definition PatternMatch.h:135

llvm::PatternMatch::m_Shuffle
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Definition PatternMatch.h:2031

llvm::PatternMatch::m_InsertElt
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Definition PatternMatch.h:1945

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition FunctionInfo.h:25

llvm::CostTableLookup
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
Definition CostTable.h:35

llvm::Cost
InstructionCost Cost
Definition FunctionSpecialization.h:103

llvm::TypeConversionCostTblEntry
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
Definition CostTable.h:61

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643

llvm::WinX64EHUnwindMode::V1
@ V1
Definition CodeGen.h:171

llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279

llvm::isa
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547

llvm::CostTblEntry
CostTblEntryT< unsigned > CostTblEntry
Definition CostTable.h:30

llvm::ArrayRef
ArrayRef(const T &OneElt) -> ArrayRef< T >

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559

llvm::VFParamKind::Vector
@ Vector
Definition VFABIDemangler.h:27

llvm::Invalid
@ Invalid
Definition PGOCtxProfWriter.h:24

llvm::ConvertCostTableLookup
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
Definition CostTable.h:66

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39

llvm::EVT
Extended Value Type.
Definition ValueTypes.h:35

llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145

llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:396

llvm::EVT::getEVT
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition ValueTypes.cpp:307

llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:339

llvm::PatternMatch::m_ZeroMask
Definition PatternMatch.h:1984

llvm::TargetTransformInfo::MemCmpExpansionOptions
Returns options for expansion of memcmp. IsZeroCmp is.
Definition TargetTransformInfo.h:1110

llvm::TargetTransformInfo::OperandValueInfo
Definition TargetTransformInfo.h:1285

llvm::TargetTransformInfo::OperandValueInfo::isUniform
bool isUniform() const
Definition TargetTransformInfo.h:1292

llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition TargetTransformInfo.h:638

llvm::TargetTransformInfo::UnrollingPreferences::UpperBound
bool UpperBound
Allow using trip count upper bound to unroll loops.
Definition TargetTransformInfo.h:709

llvm::TargetTransformInfo::UnrollingPreferences::PartialOptSizeThreshold
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
Definition TargetTransformInfo.h:667

llvm::TargetTransformInfo::UnrollingPreferences::BEInsns
unsigned BEInsns
Definition TargetTransformInfo.h:692

llvm::TargetTransformInfo::UnrollingPreferences::PartialThreshold
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Definition TargetTransformInfo.h:663

llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition TargetTransformInfo.h:699

llvm::TargetTransformInfo::UnrollingPreferences::Partial
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Definition TargetTransformInfo.h:695

llvm::TargetTransformInfo::UnrollingPreferences::OptSizeThreshold
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
Definition TargetTransformInfo.h:660