LLVM 23.0.0git
ARMTargetTransformInfo.h
Go to the documentation of this file.
1//===- ARMTargetTransformInfo.h - ARM specific TTI --------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file a TargetTransformInfoImplBase conforming object specific to the
11/// ARM target machine. It uses the target's detailed information to
12/// provide more precise answers to certain TTI queries, while letting the
13/// target independent and default TTI implementations handle the rest.
14//
15//===----------------------------------------------------------------------===//
16
17#ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
18#define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
19
20#include "ARM.h"
21#include "ARMSubtarget.h"
22#include "ARMTargetMachine.h"
23#include "llvm/ADT/ArrayRef.h"
26#include "llvm/IR/Constant.h"
27#include "llvm/IR/Function.h"
29#include <optional>
30
31namespace llvm {
32
33class APInt;
35class Instruction;
36class Loop;
37class SCEV;
38class ScalarEvolution;
39class Type;
40class Value;
41
51
52// For controlling conversion of memcpy into Tail Predicated loop.
53namespace TPLoop {
55}
56
57class ARMTTIImpl final : public BasicTTIImplBase<ARMTTIImpl> {
58 using BaseT = BasicTTIImplBase<ARMTTIImpl>;
59 using TTI = TargetTransformInfo;
60
61 friend BaseT;
62
63 const ARMSubtarget *ST;
64 const ARMTargetLowering *TLI;
65
66 const ARMSubtarget *getST() const { return ST; }
67 const ARMTargetLowering *getTLI() const { return TLI; }
68
69public:
70 explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F)
71 : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
72 TLI(ST->getTargetLowering()) {}
73
74 bool enableInterleavedAccessVectorization() const override { return true; }
75
77 getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const override;
78
79 /// Floating-point computation using ARMv8 AArch32 Advanced
80 /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD
81 /// and Arm MVE are IEEE-754 compliant.
82 bool isFPVectorizationPotentiallyUnsafe() const override {
83 return !ST->isTargetDarwin() && !ST->hasMVEFloatOps();
84 }
85
86 std::optional<Instruction *>
88 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
89 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
90 APInt &UndefElts2, APInt &UndefElts3,
91 std::function<void(Instruction *, unsigned, APInt, APInt &)>
92 SimplifyAndSetOp) const override;
93
94 /// \name Scalar TTI Implementations
95 /// @{
96
97 InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
98 const APInt &Imm,
99 Type *Ty) const override;
100
102 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
103 TTI::TargetCostKind CostKind) const override;
104
105 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
106 const APInt &Imm, Type *Ty,
108 Instruction *Inst = nullptr) const override;
109
110 /// @}
111
112 /// \name Vector TTI Implementations
113 /// @{
114
115 unsigned getNumberOfRegisters(unsigned ClassID) const override {
116 bool Vector = (ClassID == 1);
117 if (Vector) {
118 if (ST->hasNEON())
119 return 16;
120 if (ST->hasMVEIntegerOps())
121 return 8;
122 return 0;
123 }
124
125 if (ST->isThumb1Only())
126 return 8;
127 return 13;
128 }
129
132 switch (K) {
134 return TypeSize::getFixed(32);
136 if (ST->hasNEON())
137 return TypeSize::getFixed(128);
138 if (ST->hasMVEIntegerOps())
139 return TypeSize::getFixed(128);
140 return TypeSize::getFixed(0);
142 return TypeSize::getScalable(0);
143 }
144 llvm_unreachable("Unsupported register kind");
145 }
146
148 bool HasUnorderedReductions) const override {
149 return ST->getMaxInterleaveFactor();
150 }
151
152 bool isProfitableLSRChainElement(Instruction *I) const override;
153
154 bool
155 isLegalMaskedLoad(Type *DataTy, Align Alignment, unsigned AddressSpace,
156 TTI::MaskKind MaskKind =
158
159 bool
160 isLegalMaskedStore(Type *DataTy, Align Alignment, unsigned AddressSpace,
161 TTI::MaskKind MaskKind =
163 return isLegalMaskedLoad(DataTy, Alignment, AddressSpace, MaskKind);
164 }
165
167 Align Alignment) const override {
168 // For MVE, we have a custom lowering pass that will already have custom
169 // legalised any gathers that we can lower to MVE intrinsics, and want to
170 // expand all the rest. The pass runs before the masked intrinsic lowering
171 // pass.
172 return true;
173 }
174
176 Align Alignment) const override {
177 return forceScalarizeMaskedGather(VTy, Alignment);
178 }
179
180 bool isLegalMaskedGather(Type *Ty, Align Alignment) const override;
181
182 bool isLegalMaskedScatter(Type *Ty, Align Alignment) const override {
183 return isLegalMaskedGather(Ty, Alignment);
184 }
185
186 InstructionCost getMemcpyCost(const Instruction *I) const override;
187
189 return ST->getMaxInlineSizeThreshold();
190 }
191
192 int getNumMemOps(const IntrinsicInst *I) const;
193
197 VectorType *SubTp, ArrayRef<const Value *> Args = {},
198 const Instruction *CxtI = nullptr) const override;
199
200 bool preferInLoopReduction(RecurKind Kind, Type *Ty) const override;
201
202 bool preferPredicatedReductionSelect() const override;
203
204 bool shouldExpandReduction(const IntrinsicInst *II) const override {
205 return false;
206 }
207
209 const Instruction *I = nullptr) const override;
210
212 getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
214 const Instruction *I = nullptr) const override;
215
217 unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
221 const Instruction *I = nullptr) const override;
222
226 unsigned Index, const Value *Op0, const Value *Op1,
228 TTI::VectorInstrContext::None) const override;
229
231 getAddressComputationCost(Type *Val, ScalarEvolution *SE, const SCEV *Ptr,
232 TTI::TargetCostKind CostKind) const override;
233
235 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
239 const Instruction *CxtI = nullptr) const override;
240
242 unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
245 const Instruction *I = nullptr) const override;
246
248 getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA,
249 TTI::TargetCostKind CostKind) const override;
250
251 InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
253
255 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
256 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
257 bool UseMaskForCond = false, bool UseMaskForGaps = false) const override;
258
259 InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA,
261
263 getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
264 std::optional<FastMathFlags> FMF,
265 TTI::TargetCostKind CostKind) const override;
267 getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy,
268 VectorType *ValTy, std::optional<FastMathFlags> FMF,
269 TTI::TargetCostKind CostKind) const override;
271 getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy,
272 VectorType *ValTy,
273 TTI::TargetCostKind CostKind) const override;
274
276 getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
277 TTI::TargetCostKind CostKind) const override;
278
280 getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
281 TTI::TargetCostKind CostKind) const override;
282
284 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
286 TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,
288 std::optional<FastMathFlags> FMF) const override {
290 }
291
292 /// getScalingFactorCost - Return the cost of the scaling used in
293 /// addressing mode represented by AM.
294 /// If the AM is supported, the return value must be >= 0.
295 /// If the AM is not supported, the return value is an invalid cost.
297 StackOffset BaseOffset, bool HasBaseReg,
298 int64_t Scale,
299 unsigned AddrSpace) const override;
300
301 bool maybeLoweredToCall(Instruction &I) const;
302 bool isLoweredToCall(const Function *F) const override;
305 HardwareLoopInfo &HWLoopInfo) const override;
306 bool preferTailFoldingOverEpilogue(TailFoldingInfo *TFI) const override;
309 OptimizationRemarkEmitter *ORE) const override;
310
312
314 TTI::PeelingPreferences &PP) const override;
316 // In the ROPI and RWPI relocation models we can't have pointers to global
317 // variables or functions in constant data, so don't convert switches to
318 // lookup tables if any of the values would need relocation.
319 if (ST->isROPI() || ST->isRWPI())
320 return !C->needsDynamicRelocation();
321
322 return true;
323 }
324
325 bool shouldConsiderVectorizationRegPressure() const override;
326
327 bool hasArmWideBranch(bool Thumb) const override;
328
330 SmallVectorImpl<Use *> &Ops) const override;
331
332 unsigned getNumBytesToPadGlobalArray(unsigned Size,
333 Type *ArrayType) const override;
334
335 /// @}
336};
337
338/// isVREVMask - Check if a vector shuffle corresponds to a VREV
339/// instruction with the specified blocksize. (The order of the elements
340/// within each block of the vector is reversed.)
341inline bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
342 assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
343 "Only possible block sizes for VREV are: 16, 32, 64");
344
345 unsigned EltSz = VT.getScalarSizeInBits();
346 if (EltSz != 8 && EltSz != 16 && EltSz != 32)
347 return false;
348
349 unsigned BlockElts = M[0] + 1;
350 // If the first shuffle index is UNDEF, be optimistic.
351 if (M[0] < 0)
352 BlockElts = BlockSize / EltSz;
353
354 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
355 return false;
356
357 for (unsigned i = 0, e = M.size(); i < e; ++i) {
358 if (M[i] < 0)
359 continue; // ignore UNDEF indices
360 if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
361 return false;
362 }
363
364 return true;
365}
366
367} // end namespace llvm
368
369#endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
uint64_t IntrinsicInst * II
static const int BlockSize
Definition TarWriter.cpp:33
This pass exposes codegen information to IR-level passes.
Class for arbitrary precision integers.
Definition APInt.h:78
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
bool isFPVectorizationPotentiallyUnsafe() const override
Floating-point computation using ARMv8 AArch32 Advanced SIMD instructions remains unchanged from ARMv...
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getMemcpyCost(const Instruction *I) const override
bool isLegalMaskedScatter(Type *Ty, Align Alignment) const override
bool maybeLoweredToCall(Instruction &I) const
bool preferInLoopReduction(RecurKind Kind, Type *Ty) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *ValTy, TTI::TargetCostKind CostKind) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool hasArmWideBranch(bool Thumb) const override
bool shouldConsiderVectorizationRegPressure() const override
bool preferTailFoldingOverEpilogue(TailFoldingInfo *TFI) const override
bool shouldExpandReduction(const IntrinsicInst *II) const override
bool shouldBuildLookupTablesForConstant(Constant *C) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
int getNumMemOps(const IntrinsicInst *I) const
Given a memcpy/memset/memmove instruction, return the number of memory operations performed,...
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const override
bool isLoweredToCall(const Function *F) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override
bool isLegalMaskedStore(Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::MaskKind MaskKind=TTI::MaskKind::VariableOrConstantMask) const override
bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) const override
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
unsigned getMaxInterleaveFactor(ElementCount VF, bool HasUnorderedReductions) const override
bool isLegalMaskedLoad(Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::MaskKind MaskKind=TTI::MaskKind::VariableOrConstantMask) const override
ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F)
TailFoldingStyle getPreferredTailFoldingStyle() const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment) const override
unsigned getNumberOfRegisters(unsigned ClassID) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
bool preferPredicatedReductionSelect() const override
bool isLegalMaskedGather(Type *Ty, Align Alignment) const override
unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const override
bool isProfitableLSRChainElement(Instruction *I) const override
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
bool enableInterleavedAccessVectorization() const override
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
getScalingFactorCost - Return the cost of the scaling used in addressing mode represented by AM.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Class to represent array types.
A cache of @llvm.assume calls within a function.
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
This is an important base class in LLVM.
Definition Constant.h:43
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
A wrapper class for inspecting calls to intrinsic functions.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
The optimization diagnostic interface.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
Provides information about what library functions are available for the current target.
virtual const DataLayout & getDataLayout() const
virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
VectorInstrContext
Represents a hint about the context in which an insert/extract is used.
@ None
The insert/extract is not used with a load/store.
MaskKind
Some targets only support masked load/store with a constant mask.
TargetCostKind
The kind of cost model.
AddressingModeKind
Which addressing mode Loop Strength Reduction will try to generate.
ShuffleKind
The various kinds of shuffle patterns for vector queries.
CastContextHint
Represents a hint about the context in which a cast is used.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM Value Representation.
Definition Value.h:75
Base class of all SIMD vector types.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
Definition InstrProf.h:143
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:408
Attributes of a target dependent hardware loop.
Parameters that control the generic loop unrolling transformation.