LLVM 22.0.0git
ARMSelectionDAGInfo.cpp
Go to the documentation of this file.
1//===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the ARMSelectionDAGInfo class.
10//
11//===----------------------------------------------------------------------===//
12
16using namespace llvm;
17
18#define DEBUG_TYPE "arm-selectiondag-info"
19
21 "arm-memtransfer-tploop", cl::Hidden,
22 cl::desc("Control conversion of memcpy to "
23 "Tail predicated loops (WLSTP)"),
26 "Don't convert memcpy to TP loop."),
27 clEnumValN(TPLoop::ForceEnabled, "force-enabled",
28 "Always convert memcpy to TP loop."),
30 "Allow (may be subject to certain conditions) "
31 "conversion of memcpy to TP loop.")));
32
33bool ARMSelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const {
34 return Opcode >= ARMISD::FIRST_MEMORY_OPCODE &&
36}
37
38// Emit, if possible, a specialized version of the given Libcall. Typically this
39// means selecting the appropriately aligned version, but we also convert memset
40// of 0 into memclr.
42 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
43 SDValue Size, unsigned Align, RTLIB::Libcall LC) const {
44 const ARMSubtarget &Subtarget =
46 const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
47
48 // Only use a specialized AEABI function if the default version of this
49 // Libcall is an AEABI function.
50 //
51 // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
52 // able to translate memset to memclr and use the value to index the function
53 // name array.
54 enum {
55 AEABI_MEMCPY = 0,
56 AEABI_MEMMOVE,
57 AEABI_MEMSET,
58 AEABI_MEMCLR
59 } AEABILibcall;
60 switch (LC) {
61 case RTLIB::MEMCPY:
62 if (TLI->getLibcallImpl(LC) != RTLIB::impl___aeabi_memcpy)
63 return SDValue();
64
65 AEABILibcall = AEABI_MEMCPY;
66 break;
67 case RTLIB::MEMMOVE:
68 if (TLI->getLibcallImpl(LC) != RTLIB::impl___aeabi_memmove)
69 return SDValue();
70
71 AEABILibcall = AEABI_MEMMOVE;
72 break;
73 case RTLIB::MEMSET:
74 if (TLI->getLibcallImpl(LC) != RTLIB::impl___aeabi_memset)
75 return SDValue();
76
77 AEABILibcall = AEABI_MEMSET;
78 if (isNullConstant(Src))
79 AEABILibcall = AEABI_MEMCLR;
80 break;
81 default:
82 return SDValue();
83 }
84
85 // Choose the most-aligned libcall variant that we can
86 enum {
87 ALIGN1 = 0,
88 ALIGN4,
89 ALIGN8
90 } AlignVariant;
91 if ((Align & 7) == 0)
92 AlignVariant = ALIGN8;
93 else if ((Align & 3) == 0)
94 AlignVariant = ALIGN4;
95 else
96 AlignVariant = ALIGN1;
97
99 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
100 Args.emplace_back(Dst, IntPtrTy);
101 if (AEABILibcall == AEABI_MEMCLR) {
102 Args.emplace_back(Size, IntPtrTy);
103 } else if (AEABILibcall == AEABI_MEMSET) {
104 // Adjust parameters for memset, EABI uses format (ptr, size, value),
105 // GNU library uses (ptr, value, size)
106 // See RTABI section 4.3.4
107 Args.emplace_back(Size, IntPtrTy);
108
109 // Extend or truncate the argument to be an i32 value for the call.
110 if (Src.getValueType().bitsGT(MVT::i32))
111 Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
112 else if (Src.getValueType().bitsLT(MVT::i32))
113 Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
114
117 Entry.IsSExt = false;
118 Args.push_back(Entry);
119 } else {
120 Args.emplace_back(Src, IntPtrTy);
121 Args.emplace_back(Size, IntPtrTy);
122 }
123
124 static const RTLIB::Libcall FunctionImpls[4][3] = {
125 {RTLIB::MEMCPY, RTLIB::AEABI_MEMCPY4, RTLIB::AEABI_MEMCPY8},
126 {RTLIB::MEMMOVE, RTLIB::AEABI_MEMMOVE4, RTLIB::AEABI_MEMMOVE8},
127 {RTLIB::MEMSET, RTLIB::AEABI_MEMSET4, RTLIB::AEABI_MEMSET8},
128 {RTLIB::AEABI_MEMCLR, RTLIB::AEABI_MEMCLR4, RTLIB::AEABI_MEMCLR8}};
129
130 RTLIB::Libcall NewLC = FunctionImpls[AEABILibcall][AlignVariant];
131
133 CLI.setDebugLoc(dl)
134 .setChain(Chain)
136 TLI->getLibcallCallingConv(NewLC), Type::getVoidTy(*DAG.getContext()),
137 DAG.getExternalSymbol(TLI->getLibcallName(NewLC),
138 TLI->getPointerTy(DAG.getDataLayout())),
139 std::move(Args))
141 std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
142
143 return CallResult.second;
144}
145
146static bool shouldGenerateInlineTPLoop(const ARMSubtarget &Subtarget,
147 const SelectionDAG &DAG,
148 ConstantSDNode *ConstantSize,
149 Align Alignment, bool IsMemcpy) {
150 auto &F = DAG.getMachineFunction().getFunction();
152 return false;
154 return true;
155 // Do not generate inline TP loop if optimizations is disabled,
156 // or if optimization for size (-Os or -Oz) is on.
157 if (F.hasOptNone() || F.hasOptSize())
158 return false;
159 // If cli option is unset, for memset always generate inline TP.
160 // For memcpy, check some conditions
161 if (!IsMemcpy)
162 return true;
163 if (!ConstantSize && Alignment >= Align(4))
164 return true;
165 if (ConstantSize &&
166 ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold() &&
167 ConstantSize->getZExtValue() <
169 return true;
170 return false;
171}
172
174 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
175 SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
176 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
177 const ARMSubtarget &Subtarget =
180
181 if (Subtarget.hasMVEIntegerOps() &&
182 shouldGenerateInlineTPLoop(Subtarget, DAG, ConstantSize, Alignment, true))
183 return DAG.getNode(ARMISD::MEMCPYLOOP, dl, MVT::Other, Chain, Dst, Src,
184 DAG.getZExtOrTrunc(Size, dl, MVT::i32));
185
186 // Do repeated 4-byte loads and stores. To be improved.
187 // This requires 4-byte alignment.
188 if (Alignment < Align(4))
189 return SDValue();
190 // This requires the copy size to be a constant, preferably
191 // within a subtarget-specific limit.
192 if (!ConstantSize)
193 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
194 Alignment.value(), RTLIB::MEMCPY);
195 uint64_t SizeVal = ConstantSize->getZExtValue();
196 if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
197 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
198 Alignment.value(), RTLIB::MEMCPY);
199
200 unsigned BytesLeft = SizeVal & 3;
201 unsigned NumMemOps = SizeVal >> 2;
202 unsigned EmittedNumMemOps = 0;
203 EVT VT = MVT::i32;
204 unsigned VTSize = 4;
205 unsigned i = 0;
206 // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
207 const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
208 SDValue TFOps[6];
209 SDValue Loads[6];
210 uint64_t SrcOff = 0, DstOff = 0;
211
212 // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
213 // VLDM/VSTM and make this code emit it when appropriate. This would reduce
214 // pressure on the general purpose registers. However this seems harder to map
215 // onto the register allocator's view of the world.
216
217 // The number of MEMCPY pseudo-instructions to emit. We use up to
218 // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
219 // later on. This is a lower bound on the number of MEMCPY operations we must
220 // emit.
221 unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
222
223 // Code size optimisation: do not inline memcpy if expansion results in
224 // more instructions than the libary call.
225 if (NumMEMCPYs > 1 && Subtarget.hasMinSize()) {
226 return SDValue();
227 }
228
229 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
230
231 for (unsigned I = 0; I != NumMEMCPYs; ++I) {
232 // Evenly distribute registers among MEMCPY operations to reduce register
233 // pressure.
234 unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
235 unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
236
237 Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
238 DAG.getConstant(NumRegs, dl, MVT::i32));
239 Src = Dst.getValue(1);
240 Chain = Dst.getValue(2);
241
242 DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
243 SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
244
245 EmittedNumMemOps = NextEmittedNumMemOps;
246 }
247
248 if (BytesLeft == 0)
249 return Chain;
250
251 // Issue loads / stores for the trailing (1 - 3) bytes.
252 auto getRemainingValueType = [](unsigned BytesLeft) {
253 return (BytesLeft >= 2) ? MVT::i16 : MVT::i8;
254 };
255 auto getRemainingSize = [](unsigned BytesLeft) {
256 return (BytesLeft >= 2) ? 2 : 1;
257 };
258
259 unsigned BytesLeftSave = BytesLeft;
260 i = 0;
261 while (BytesLeft) {
262 VT = getRemainingValueType(BytesLeft);
263 VTSize = getRemainingSize(BytesLeft);
264 Loads[i] = DAG.getLoad(VT, dl, Chain,
265 DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
266 DAG.getConstant(SrcOff, dl, MVT::i32)),
267 SrcPtrInfo.getWithOffset(SrcOff));
268 TFOps[i] = Loads[i].getValue(1);
269 ++i;
270 SrcOff += VTSize;
271 BytesLeft -= VTSize;
272 }
273 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ArrayRef(TFOps, i));
274
275 i = 0;
276 BytesLeft = BytesLeftSave;
277 while (BytesLeft) {
278 VT = getRemainingValueType(BytesLeft);
279 VTSize = getRemainingSize(BytesLeft);
280 TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
281 DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
282 DAG.getConstant(DstOff, dl, MVT::i32)),
283 DstPtrInfo.getWithOffset(DstOff));
284 ++i;
285 DstOff += VTSize;
286 BytesLeft -= VTSize;
287 }
288 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ArrayRef(TFOps, i));
289}
290
292 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
293 SDValue Size, Align Alignment, bool isVolatile,
294 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
295 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
296 Alignment.value(), RTLIB::MEMMOVE);
297}
298
300 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
301 SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
302 MachinePointerInfo DstPtrInfo) const {
303
304 const ARMSubtarget &Subtarget =
306
308
309 // Generate TP loop for llvm.memset
310 if (Subtarget.hasMVEIntegerOps() &&
311 shouldGenerateInlineTPLoop(Subtarget, DAG, ConstantSize, Alignment,
312 false)) {
313 Src = DAG.getSplatBuildVector(MVT::v16i8, dl,
314 DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src));
315 return DAG.getNode(ARMISD::MEMSETLOOP, dl, MVT::Other, Chain, Dst, Src,
316 DAG.getZExtOrTrunc(Size, dl, MVT::i32));
317 }
318
319 if (!AlwaysInline)
320 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
321 Alignment.value(), RTLIB::MEMSET);
322
323 return SDValue();
324}
return SDValue()
static bool shouldGenerateInlineTPLoop(const ARMSubtarget &Subtarget, const SelectionDAG &DAG, ConstantSDNode *ConstantSize, Align Alignment, bool IsMemcpy)
static cl::opt< TPLoop::MemTransfer > EnableMemtransferTPLoop("arm-memtransfer-tploop", cl::Hidden, cl::desc("Control conversion of memcpy to " "Tail predicated loops (WLSTP)"), cl::init(TPLoop::ForceDisabled), cl::values(clEnumValN(TPLoop::ForceDisabled, "force-disabled", "Don't convert memcpy to TP loop."), clEnumValN(TPLoop::ForceEnabled, "force-enabled", "Always convert memcpy to TP loop."), clEnumValN(TPLoop::Allow, "allow", "Allow (may be subject to certain conditions) " "conversion of memcpy to TP loop.")))
This file a TargetTransformInfoImplBase conforming object specific to the ARM target machine.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
SDValue EmitSpecializedLibcall(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, RTLIB::Libcall LC) const
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, SDValue Op2, SDValue Op3, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo) const override
Emit target-specific code that performs a memset.
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override
Emit target-specific code that performs a memcpy.
SDValue EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override
Emit target-specific code that performs a memmove.
bool isTargetMemoryOpcode(unsigned Opcode) const override
Returns true if a node with the given target-specific opcode has a memory operand.
bool isThumb1Only() const
const ARMTargetLowering * getTargetLowering() const override
unsigned getMaxMemcpyTPInlineSizeThreshold() const
getMaxMemcpyTPInlineSizeThreshold - Returns the maximum size that still makes it profitable to inline...
unsigned getMaxInlineSizeThreshold() const
getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size that still makes it profitable t...
bool hasMinSize() const
uint64_t getZExtValue() const
LLVM_ABI IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDValue getValue(unsigned R) const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
std::vector< ArgListEntry > ArgListTy
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:297
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:281
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
ArrayRef(const T &OneElt) -> ArrayRef< T >
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setChain(SDValue InChain)