LLVM 23.0.0git
X86ISelLoweringCall.cpp
Go to the documentation of this file.
1//===- llvm/lib/Target/X86/X86ISelCallLowering.cpp - Call lowering --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file implements the lowering of LLVM calls to DAG nodes.
11//
12//===----------------------------------------------------------------------===//
13
15#include "X86.h"
16#include "X86CallingConv.h"
17#include "X86FrameLowering.h"
18#include "X86ISelLowering.h"
19#include "X86InstrBuilder.h"
21#include "X86TargetMachine.h"
22#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/IRBuilder.h"
29#include "llvm/IR/Module.h"
31
32#define DEBUG_TYPE "x86-isel"
33
34using namespace llvm;
35
36STATISTIC(NumTailCalls, "Number of tail calls");
37
38/// Call this when the user attempts to do something unsupported, like
39/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
40/// report_fatal_error, so calling code should attempt to recover without
41/// crashing.
42static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
43 const char *Msg) {
45 DAG.getContext()->diagnose(
47}
48
49/// Returns true if a CC can dynamically exclude a register from the list of
50/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
51/// the return registers.
53 switch (CC) {
54 default:
55 return false;
59 return true;
60 }
61}
62
63/// Returns true if a CC can dynamically exclude a register from the list of
64/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
65/// the parameters.
69
70static std::pair<MVT, unsigned>
72 const X86Subtarget &Subtarget) {
73 // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
74 // convention is one that uses k registers.
75 if (NumElts == 2)
76 return {MVT::v2i64, 1};
77 if (NumElts == 4)
78 return {MVT::v4i32, 1};
79 if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
81 return {MVT::v8i16, 1};
82 if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
84 return {MVT::v16i8, 1};
85 // v32i1 passes in ymm unless we have BWI and the calling convention is
86 // regcall.
87 if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
88 return {MVT::v32i8, 1};
89 // Split v64i1 vectors if we don't have v64i8 available.
90 if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
91 if (Subtarget.useAVX512Regs())
92 return {MVT::v64i8, 1};
93 return {MVT::v32i8, 2};
94 }
95
96 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
97 if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
98 NumElts > 64)
99 return {MVT::i8, NumElts};
100
102}
103
106 EVT VT) const {
107 if (VT.isVector()) {
108 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
109 unsigned NumElts = VT.getVectorNumElements();
110
111 MVT RegisterVT;
112 unsigned NumRegisters;
113 std::tie(RegisterVT, NumRegisters) =
114 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
115 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
116 return RegisterVT;
117 }
118
119 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
120 return MVT::v8f16;
121 }
122
123 // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
124 if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&
125 !Subtarget.hasX87())
126 return MVT::i32;
127
128 if (isTypeLegal(MVT::f16)) {
129 if (VT.isVectorOf(MVT::bf16))
131 Context, CC, VT.changeVectorElementType(Context, MVT::f16));
132
133 if (VT == MVT::bf16)
134 return MVT::f16;
135 }
136
137 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
138}
139
142 EVT VT) const {
143 if (VT.isVector()) {
144 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
145 unsigned NumElts = VT.getVectorNumElements();
146
147 MVT RegisterVT;
148 unsigned NumRegisters;
149 std::tie(RegisterVT, NumRegisters) =
150 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
151 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
152 return NumRegisters;
153 }
154
155 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
156 return 1;
157 }
158
159 // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
160 // x87 is disabled.
161 if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
162 if (VT == MVT::f64)
163 return 2;
164 if (VT == MVT::f80)
165 return 3;
166 }
167
168 if (VT.isVectorOf(MVT::bf16) && isTypeLegal(MVT::f16))
170 Context, CC, VT.changeVectorElementType(Context, MVT::f16));
171
172 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
173}
174
176 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
177 unsigned &NumIntermediates, MVT &RegisterVT) const {
178 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
179 if (VT.isVectorOf(MVT::i1) && Subtarget.hasAVX512() &&
181 (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
182 VT.getVectorNumElements() > 64)) {
183 RegisterVT = MVT::i8;
184 IntermediateVT = MVT::i1;
185 NumIntermediates = VT.getVectorNumElements();
186 return NumIntermediates;
187 }
188
189 // Split v64i1 vectors if we don't have v64i8 available.
190 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
192 RegisterVT = MVT::v32i8;
193 IntermediateVT = MVT::v32i1;
194 NumIntermediates = 2;
195 return 2;
196 }
197
198 // Split vNbf16 vectors according to vNf16.
199 if (VT.isVectorOf(MVT::bf16) && isTypeLegal(MVT::f16))
200 VT = VT.changeVectorElementType(Context, MVT::f16);
201
202 return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
203 NumIntermediates, RegisterVT);
204}
205
207 LLVMContext& Context,
208 EVT VT) const {
209 if (!VT.isVector())
210 return MVT::i8;
211
212 if (Subtarget.hasAVX512()) {
213 // Figure out what this type will be legalized to.
214 EVT LegalVT = VT;
215 while (getTypeAction(Context, LegalVT) != TypeLegal)
216 LegalVT = getTypeToTransformTo(Context, LegalVT);
217
218 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
219 if (LegalVT.getSimpleVT().is512BitVector())
220 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
221
222 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
223 // If we legalized to less than a 512-bit vector, then we will use a vXi1
224 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
225 // vXi16/vXi8.
226 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
227 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
228 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
229 }
230 }
231
233}
234
236 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
237 const DataLayout &DL) const {
238 // On x86-64 i128 is split into two i64s and needs to be allocated to two
239 // consecutive registers, or spilled to the stack as a whole. On x86-32 i128
240 // is split to four i32s and never actually passed in registers, but we use
241 // the consecutive register mark to match it in TableGen.
242 if (Ty->isIntegerTy(128))
243 return true;
244
245 // On x86-32, fp128 acts the same as i128.
246 if (Subtarget.is32Bit() && Ty->isFP128Ty())
247 return true;
248
249 return false;
250}
251
252/// Helper for getByValTypeAlignment to determine
253/// the desired ByVal argument alignment.
254static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
255 if (MaxAlign == 16)
256 return;
257 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
258 if (VTy->getPrimitiveSizeInBits().getFixedValue() == 128)
259 MaxAlign = Align(16);
260 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
261 Align EltAlign;
262 getMaxByValAlign(ATy->getElementType(), EltAlign);
263 if (EltAlign > MaxAlign)
264 MaxAlign = EltAlign;
265 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
266 for (auto *EltTy : STy->elements()) {
267 Align EltAlign;
268 getMaxByValAlign(EltTy, EltAlign);
269 if (EltAlign > MaxAlign)
270 MaxAlign = EltAlign;
271 if (MaxAlign == 16)
272 break;
273 }
274 }
275}
276
277/// Return the desired alignment for ByVal aggregate
278/// function arguments in the caller parameter area. For X86, aggregates
279/// that contain SSE vectors are placed at 16-byte boundaries while the rest
280/// are at 4-byte boundaries.
282 const DataLayout &DL) const {
283 if (Subtarget.is64Bit())
284 return std::max(DL.getABITypeAlign(Ty), Align::Constant<8>());
285
286 Align Alignment(4);
287 if (Subtarget.hasSSE1())
288 getMaxByValAlign(Ty, Alignment);
289 return Alignment;
290}
291
292/// It returns EVT::Other if the type should be determined using generic
293/// target-independent logic.
294/// For vector ops we check that the overall size isn't larger than our
295/// preferred vector width.
297 LLVMContext &Context, const MemOp &Op,
298 const AttributeList &FuncAttributes) const {
299 if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
300 if (Op.size() >= 16 &&
301 (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
302 // FIXME: Check if unaligned 64-byte accesses are slow.
303 if (Op.size() >= 64 && Subtarget.hasAVX512() &&
304 (Subtarget.getPreferVectorWidth() >= 512)) {
305 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
306 }
307 // FIXME: Check if unaligned 32-byte accesses are slow.
308 if (Op.size() >= 32 && Subtarget.hasAVX() &&
309 Subtarget.useLight256BitInstructions()) {
310 // Although this isn't a well-supported type for AVX1, we'll let
311 // legalization and shuffle lowering produce the optimal codegen. If we
312 // choose an optimal type with a vector element larger than a byte,
313 // getMemsetStores() may create an intermediate splat (using an integer
314 // multiply) before we splat as a vector.
315 return MVT::v32i8;
316 }
317 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
318 return MVT::v16i8;
319 // TODO: Can SSE1 handle a byte vector?
320 // If we have SSE1 registers we should be able to use them.
321 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
322 (Subtarget.getPreferVectorWidth() >= 128))
323 return MVT::v4f32;
324 } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
325 Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
326 // Do not use f64 to lower memcpy if source is string constant. It's
327 // better to use i32 to avoid the loads.
328 // Also, do not use f64 to lower memset unless this is a memset of zeros.
329 // The gymnastics of splatting a byte value into an XMM register and then
330 // only using 8-byte stores (because this is a CPU with slow unaligned
331 // 16-byte accesses) makes that a loser.
332 return MVT::f64;
333 }
334 }
335 // This is a compromise. If we reach here, unaligned accesses may be slow on
336 // this target. However, creating smaller, aligned accesses could be even
337 // slower and would certainly be a lot more code.
338 if (Subtarget.is64Bit() && Op.size() >= 8)
339 return MVT::i64;
340 return MVT::i32;
341}
342
344 if (VT == MVT::f32)
345 return Subtarget.hasSSE1();
346 if (VT == MVT::f64)
347 return Subtarget.hasSSE2();
348 return true;
349}
350
351static bool isBitAligned(Align Alignment, uint64_t SizeInBits) {
352 return (8 * Alignment.value()) % SizeInBits == 0;
353}
354
356 if (isBitAligned(Alignment, VT.getSizeInBits()))
357 return true;
358 switch (VT.getSizeInBits()) {
359 default:
360 // 8-byte and under are always assumed to be fast.
361 return true;
362 case 128:
363 return !Subtarget.isUnalignedMem16Slow();
364 case 256:
365 return !Subtarget.isUnalignedMem32Slow();
366 // TODO: What about AVX-512 (512-bit) accesses?
367 }
368}
369
371 EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
372 unsigned *Fast) const {
373 if (Fast)
374 *Fast = isMemoryAccessFast(VT, Alignment);
375 // NonTemporal vector memory ops must be aligned.
376 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
377 // NT loads can only be vector aligned, so if its less aligned than the
378 // minimum vector size (which we can split the vector down to), we might as
379 // well use a regular unaligned vector load.
380 // We don't have any NT loads pre-SSE41.
381 if (!!(Flags & MachineMemOperand::MOLoad))
382 return (Alignment < 16 || !Subtarget.hasSSE41());
383 return false;
384 }
385 // Misaligned accesses of any size are always allowed.
386 return true;
387}
388
390 const DataLayout &DL, EVT VT,
391 unsigned AddrSpace, Align Alignment,
393 unsigned *Fast) const {
394 if (Fast)
395 *Fast = isMemoryAccessFast(VT, Alignment);
396 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
397 if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags,
398 /*Fast=*/nullptr))
399 return true;
400 // NonTemporal vector memory ops are special, and must be aligned.
401 if (!isBitAligned(Alignment, VT.getSizeInBits()))
402 return false;
403 switch (VT.getSizeInBits()) {
404 case 128:
405 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41())
406 return true;
407 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2())
408 return true;
409 return false;
410 case 256:
411 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2())
412 return true;
413 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX())
414 return true;
415 return false;
416 case 512:
417 if (Subtarget.hasAVX512())
418 return true;
419 return false;
420 default:
421 return false; // Don't have NonTemporal vector memory ops of this size.
422 }
423 }
424 return true;
425}
426
427/// Return the entry encoding for a jump table in the
428/// current function. The returned value is a member of the
429/// MachineJumpTableInfo::JTEntryKind enum.
431 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
432 // symbol.
433 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
435 if (isPositionIndependent() &&
437 !Subtarget.isTargetCOFF())
439
440 // Otherwise, use the normal jump table encoding heuristics.
442}
443
445 return Subtarget.useSoftFloat();
446}
447
449 ArgListTy &Args) const {
450
451 // Only relabel X86-32 for C / Stdcall CCs.
452 if (Subtarget.is64Bit())
453 return;
454 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
455 return;
456 unsigned ParamRegs = 0;
457 if (auto *M = MF->getFunction().getParent())
458 ParamRegs = M->getNumberRegisterParameters();
459
460 // Mark the first N int arguments as having reg
461 for (auto &Arg : Args) {
462 Type *T = Arg.Ty;
463 if (T->isIntOrPtrTy())
464 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
465 unsigned numRegs = 1;
466 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
467 numRegs = 2;
468 if (ParamRegs < numRegs)
469 return;
470 ParamRegs -= numRegs;
471 Arg.IsInReg = true;
472 }
473 }
474}
475
476const MCExpr *
478 const MachineBasicBlock *MBB,
479 unsigned uid,MCContext &Ctx) const{
480 assert(isPositionIndependent() && Subtarget.isPICStyleGOT());
481 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
482 // entries.
483 return MCSymbolRefExpr::create(MBB->getSymbol(), X86::S_GOTOFF, Ctx);
484}
485
486/// Returns relocation base for the given PIC jumptable.
488 SelectionDAG &DAG) const {
489 if (!Subtarget.is64Bit())
490 // This doesn't have SDLoc associated with it, but is not really the
491 // same as a Register.
492 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
494 return Table;
495}
496
497/// This returns the relocation base for the given PIC jumptable,
498/// the same as getPICJumpTableRelocBase, but as an MCExpr.
500getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
501 MCContext &Ctx) const {
502 // X86-64 uses RIP relative addressing based on the jump table label.
503 if (Subtarget.isPICStyleRIPRel() ||
504 (Subtarget.is64Bit() &&
507
508 // Otherwise, the reference is relative to the PIC base.
509 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
510}
511
512std::pair<const TargetRegisterClass *, uint8_t>
514 MVT VT) const {
515 const TargetRegisterClass *RRC = nullptr;
516 uint8_t Cost = 1;
517 switch (VT.SimpleTy) {
518 default:
520 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
521 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
522 break;
523 case MVT::x86mmx:
524 RRC = &X86::VR64RegClass;
525 break;
526 case MVT::f32: case MVT::f64:
527 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
528 case MVT::v4f32: case MVT::v2f64:
529 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
530 case MVT::v8f32: case MVT::v4f64:
531 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
532 case MVT::v16f32: case MVT::v8f64:
533 RRC = &X86::VR128XRegClass;
534 break;
535 }
536 return std::make_pair(RRC, Cost);
537}
538
539unsigned X86TargetLowering::getAddressSpace() const {
540 if (Subtarget.is64Bit())
542 : X86AS::FS;
543 return X86AS::GS;
544}
545
546static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
547 return TargetTriple.isOSGlibc() || TargetTriple.isMusl() ||
548 TargetTriple.isOSFuchsia() || TargetTriple.isAndroid();
549}
550
557
558Value *
560 const LibcallLoweringInfo &Libcalls) const {
561 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
562 // tcbhead_t; use it instead of the usual global variable (see
563 // sysdeps/{i386,x86_64}/nptl/tls.h)
564 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
565 unsigned AddressSpace = getAddressSpace();
566
567 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
568 if (Subtarget.isTargetFuchsia())
569 return SegmentOffset(IRB, 0x10, AddressSpace);
570
571 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
572 // Specially, some users may customize the base reg and offset.
573 int Offset = M->getStackProtectorGuardOffset();
574 // If we don't set -stack-protector-guard-offset value:
575 // %fs:0x28, unless we're using a Kernel code model, in which case
576 // it's %gs:0x28. gs:0x14 on i386.
577 if (Offset == INT_MAX)
578 Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
579
580 StringRef GuardReg = M->getStackProtectorGuardReg();
581 if (GuardReg == "fs")
583 else if (GuardReg == "gs")
585
586 // Use symbol guard if user specify.
587 StringRef GuardSymb = M->getStackProtectorGuardSymbol();
588 if (!GuardSymb.empty()) {
589 GlobalVariable *GV = M->getGlobalVariable(GuardSymb);
590 if (!GV) {
591 Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext())
592 : Type::getInt32Ty(M->getContext());
593 GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
594 nullptr, GuardSymb, nullptr,
596 if (!Subtarget.isTargetDarwin())
597 GV->setDSOLocal(M->getDirectAccessExternalData());
598 }
599 return GV;
600 }
601
602 return SegmentOffset(IRB, Offset, AddressSpace);
603 }
604 return TargetLowering::getIRStackGuard(IRB, Libcalls);
605}
606
608 Module &M, const LibcallLoweringInfo &Libcalls) const {
609 // MSVC CRT provides functionalities for stack protection.
610 RTLIB::LibcallImpl SecurityCheckCookieLibcall =
611 Libcalls.getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE);
612
613 RTLIB::LibcallImpl SecurityCookieVar =
614 Libcalls.getLibcallImpl(RTLIB::STACK_CHECK_GUARD);
615 if (SecurityCheckCookieLibcall != RTLIB::Unsupported &&
616 SecurityCookieVar != RTLIB::Unsupported) {
617 // MSVC CRT provides functionalities for stack protection.
618 // MSVC CRT has a global variable holding security cookie.
619 M.getOrInsertGlobal(getLibcallImplName(SecurityCookieVar),
620 PointerType::getUnqual(M.getContext()));
621
622 // MSVC CRT has a function to validate security cookie.
623 FunctionCallee SecurityCheckCookie =
624 M.getOrInsertFunction(getLibcallImplName(SecurityCheckCookieLibcall),
625 Type::getVoidTy(M.getContext()),
626 PointerType::getUnqual(M.getContext()));
627
628 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
629 F->setCallingConv(CallingConv::X86_FastCall);
630 F->addParamAttr(0, Attribute::AttrKind::InReg);
631 }
632 return;
633 }
634
635 StringRef GuardMode = M.getStackProtectorGuard();
636
637 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
638 if ((GuardMode == "tls" || GuardMode.empty()) &&
639 hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
640 return;
642}
643
645 IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const {
646 // Android provides a fixed TLS slot for the SafeStack pointer. See the
647 // definition of TLS_SLOT_SAFESTACK in
648 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
649 if (Subtarget.isTargetAndroid()) {
650 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
651 // %gs:0x24 on i386
652 int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
653 return SegmentOffset(IRB, Offset, getAddressSpace());
654 }
655
656 // Fuchsia is similar.
657 if (Subtarget.isTargetFuchsia()) {
658 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
659 return SegmentOffset(IRB, 0x18, getAddressSpace());
660 }
661
663}
664
665//===----------------------------------------------------------------------===//
666// Return Value Calling Convention Implementation
667//===----------------------------------------------------------------------===//
668
669bool X86TargetLowering::CanLowerReturn(
670 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
671 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
672 const Type *RetTy) const {
673 // Mingw64 GCC returns f128 via sret, and LLVM matches it for compatibility.
674 // This logic exists for libcalls, a frontend should explicitly use sret
675 // rather than rely on the sret demotion here.
676 //
677 // Using sret is a reasonable implementation of the Windows x64 calling
678 // convention:
679 //
680 // https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-170#return-values
681 //
682 // > Otherwise, the caller must allocate memory for the return value and pass
683 // > a pointer to it as the first argument.
684 //
685 // Although it is not the only reasonable interpretation:
686 //
687 // > Nonscalar types including floats, doubles, and vector types such as
688 // > __m128, __m128i, __m128d are returned in XMM0.
689 //
690 // For now, we prefer compatibility with GCC. If official guidelines are ever
691 // published, this can be revisited.
692 //
693 // Return false, which will perform sret demotion.
694 auto IsWin64F128StackCC = [this](CallingConv::ID CC) -> bool {
695 switch (CC) {
697 return true;
698 case CallingConv::C:
699 return Subtarget.isOSWindowsOrUEFI();
700 default:
701 return false;
702 }
703 };
704
705 if (IsWin64F128StackCC(CallConv) &&
707 Outs, [](const ISD::OutputArg &Out) { return Out.VT == MVT::f128; }))
708 return false;
709
711 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
712 return CCInfo.CheckReturn(Outs, RetCC_X86);
713}
714
715const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
716 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
717 return ScratchRegs;
718}
719
721 static const MCPhysReg RCRegs[] = {X86::FPCW, X86::MXCSR};
722 return RCRegs;
723}
724
725/// Lowers masks values (v*i1) to the local register values
726/// \returns DAG node after lowering to register type
727static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
728 const SDLoc &DL, SelectionDAG &DAG) {
729 EVT ValVT = ValArg.getValueType();
730
731 if (ValVT == MVT::v1i1)
732 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ValLoc, ValArg,
733 DAG.getIntPtrConstant(0, DL));
734
735 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
736 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
737 // Two stage lowering might be required
738 // bitcast: v8i1 -> i8 / v16i1 -> i16
739 // anyextend: i8 -> i32 / i16 -> i32
740 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
741 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
742 if (ValLoc == MVT::i32)
743 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValToCopy);
744 return ValToCopy;
745 }
746
747 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
748 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
749 // One stage lowering is required
750 // bitcast: v32i1 -> i32 / v64i1 -> i64
751 return DAG.getBitcast(ValLoc, ValArg);
752 }
753
754 return DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValArg);
755}
756
757/// Breaks v64i1 value into two registers and adds the new node to the DAG
759 const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg,
760 SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
761 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
762 assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
763 assert(Subtarget.is32Bit() && "Expecting 32 bit target");
764 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");
765 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
766 "The value should reside in two registers");
767
768 // Before splitting the value we cast it to i64
769 Arg = DAG.getBitcast(MVT::i64, Arg);
770
771 // Splitting the value into two i32 types
772 SDValue Lo, Hi;
773 std::tie(Lo, Hi) = DAG.SplitScalar(Arg, DL, MVT::i32, MVT::i32);
774
775 // Attach the two i32 types into corresponding registers
776 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
777 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
778}
779
781X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
782 bool isVarArg,
784 const SmallVectorImpl<SDValue> &OutVals,
785 const SDLoc &dl, SelectionDAG &DAG) const {
786 MachineFunction &MF = DAG.getMachineFunction();
787 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
788
789 // In some cases we need to disable registers from the default CSR list.
790 // For example, when they are used as return registers (preserve_* and X86's
791 // regcall) or for argument passing (X86's regcall).
792 bool ShouldDisableCalleeSavedRegister =
793 shouldDisableRetRegFromCSR(CallConv) ||
794 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
795
796 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
797 report_fatal_error("X86 interrupts may not return any value");
798
800 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
801 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
802
804 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
805 ++I, ++OutsIndex) {
806 CCValAssign &VA = RVLocs[I];
807 assert(VA.isRegLoc() && "Can only return in registers!");
808
809 // Add the register to the CalleeSaveDisableRegs list.
810 if (ShouldDisableCalleeSavedRegister)
812
813 SDValue ValToCopy = OutVals[OutsIndex];
814 EVT ValVT = ValToCopy.getValueType();
815
816 // Promote values to the appropriate types.
817 if (VA.getLocInfo() == CCValAssign::SExt)
818 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
819 else if (VA.getLocInfo() == CCValAssign::ZExt)
820 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
821 else if (VA.getLocInfo() == CCValAssign::AExt) {
822 if (ValVT.isVectorOf(MVT::i1))
823 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
824 else
825 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
826 }
827 else if (VA.getLocInfo() == CCValAssign::BCvt)
828 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
829
831 "Unexpected FP-extend for return value.");
832
833 // Report an error if we have attempted to return a value via an XMM
834 // register and SSE was disabled.
835 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
836 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
837 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
838 } else if (!Subtarget.hasSSE2() &&
839 X86::FR64XRegClass.contains(VA.getLocReg()) &&
840 ValVT == MVT::f64) {
841 // When returning a double via an XMM register, report an error if SSE2 is
842 // not enabled.
843 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
844 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
845 }
846
847 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
848 // the RET instruction and handled by the FP Stackifier.
849 if (VA.getLocReg() == X86::FP0 ||
850 VA.getLocReg() == X86::FP1) {
851 // If this is a copy from an xmm register to ST(0), use an FPExtend to
852 // change the value to the FP stack register class.
854 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
855 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
856 // Don't emit a copytoreg.
857 continue;
858 }
859
860 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
861 // which is returned in RAX / RDX.
862 if (Subtarget.is64Bit()) {
863 if (ValVT == MVT::x86mmx) {
864 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
865 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
866 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
867 ValToCopy);
868 // If we don't have SSE2 available, convert to v4f32 so the generated
869 // register is legal.
870 if (!Subtarget.hasSSE2())
871 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
872 }
873 }
874 }
875
876 if (VA.needsCustom()) {
877 assert(VA.getValVT() == MVT::v64i1 &&
878 "Currently the only custom case is when we split v64i1 to 2 regs");
879
880 Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
881 Subtarget);
882
883 // Add the second register to the CalleeSaveDisableRegs list.
884 if (ShouldDisableCalleeSavedRegister)
885 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
886 } else {
887 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
888 }
889 }
890
891 SDValue Glue;
893 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
894 // Operand #1 = Bytes To Pop
895 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
896 MVT::i32));
897
898 // Copy the result values into the output registers.
899 for (auto &RetVal : RetVals) {
900 if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
901 RetOps.push_back(RetVal.second);
902 continue; // Don't emit a copytoreg.
903 }
904
905 Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Glue);
906 Glue = Chain.getValue(1);
907 RetOps.push_back(
908 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
909 }
910
911 // Swift calling convention does not require we copy the sret argument
912 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
913
914 // All x86 ABIs require that for returning structs by value we copy
915 // the sret argument into %rax/%eax (depending on ABI) for the return.
916 // We saved the argument into a virtual register in the entry block,
917 // so now we copy the value out and into %rax/%eax.
918 //
919 // Checking Function.hasStructRetAttr() here is insufficient because the IR
920 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
921 // false, then an sret argument may be implicitly inserted in the SelDAG. In
922 // either case FuncInfo->setSRetReturnReg() will have been called.
923 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
924 // When we have both sret and another return value, we should use the
925 // original Chain stored in RetOps[0], instead of the current Chain updated
926 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
927
928 // For the case of sret and another return value, we have
929 // Chain_0 at the function entry
930 // Chain_1 = getCopyToReg(Chain_0) in the above loop
931 // If we use Chain_1 in getCopyFromReg, we will have
932 // Val = getCopyFromReg(Chain_1)
933 // Chain_2 = getCopyToReg(Chain_1, Val) from below
934
935 // getCopyToReg(Chain_0) will be glued together with
936 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
937 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
938 // Data dependency from Unit B to Unit A due to usage of Val in
939 // getCopyToReg(Chain_1, Val)
940 // Chain dependency from Unit A to Unit B
941
942 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
943 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
945
946 Register RetValReg
947 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
948 X86::RAX : X86::EAX;
949 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Glue);
950 Glue = Chain.getValue(1);
951
952 // RAX/EAX now acts like a return value.
953 RetOps.push_back(
954 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
955
956 // Add the returned register to the CalleeSaveDisableRegs list. Don't do
957 // this however for preserve_most/preserve_all to minimize the number of
958 // callee-saved registers for these CCs.
959 if (ShouldDisableCalleeSavedRegister &&
960 CallConv != CallingConv::PreserveAll &&
961 CallConv != CallingConv::PreserveMost)
963 }
964
965 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
966 const MCPhysReg *I =
967 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
968 if (I) {
969 for (; *I; ++I) {
970 if (X86::GR64RegClass.contains(*I))
971 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
972 else
973 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
974 }
975 }
976
977 RetOps[0] = Chain; // Update chain.
978
979 // Add the glue if we have it.
980 if (Glue.getNode())
981 RetOps.push_back(Glue);
982
983 unsigned RetOpcode = X86ISD::RET_GLUE;
984 if (CallConv == CallingConv::X86_INTR)
985 RetOpcode = X86ISD::IRET;
986 return DAG.getNode(RetOpcode, dl, MVT::Other, RetOps);
987}
988
989bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
990 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
991 return false;
992
993 SDValue TCChain = Chain;
994 SDNode *Copy = *N->user_begin();
995 if (Copy->getOpcode() == ISD::CopyToReg) {
996 // If the copy has a glue operand, we conservatively assume it isn't safe to
997 // perform a tail call.
998 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
999 return false;
1000 TCChain = Copy->getOperand(0);
1001 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
1002 return false;
1003
1004 bool HasRet = false;
1005 for (const SDNode *U : Copy->users()) {
1006 if (U->getOpcode() != X86ISD::RET_GLUE)
1007 return false;
1008 // If we are returning more than one value, we can definitely
1009 // not make a tail call see PR19530
1010 if (U->getNumOperands() > 4)
1011 return false;
1012 if (U->getNumOperands() == 4 &&
1013 U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue)
1014 return false;
1015 HasRet = true;
1016 }
1017
1018 if (!HasRet)
1019 return false;
1020
1021 Chain = TCChain;
1022 return true;
1023}
1024
1025EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
1026 ISD::NodeType ExtendKind) const {
1027 MVT ReturnMVT = MVT::i32;
1028
1029 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
1030 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
1031 // The ABI does not require i1, i8 or i16 to be extended.
1032 //
1033 // On Darwin, there is code in the wild relying on Clang's old behaviour of
1034 // always extending i8/i16 return values, so keep doing that for now.
1035 // (PR26665).
1036 ReturnMVT = MVT::i8;
1037 }
1038
1039 EVT MinVT = getRegisterType(Context, ReturnMVT);
1040 return VT.bitsLT(MinVT) ? MinVT : VT;
1041}
1042
1043/// Reads two 32 bit registers and creates a 64 bit mask value.
1044/// \param VA The current 32 bit value that need to be assigned.
1045/// \param NextVA The next 32 bit value that need to be assigned.
1046/// \param Root The parent DAG node.
1047/// \param [in,out] InGlue Represents SDvalue in the parent DAG node for
1048/// glue purposes. In the case the DAG is already using
1049/// physical register instead of virtual, we should glue
1050/// our new SDValue to InGlue SDvalue.
1051/// \return a new SDvalue of size 64bit.
1053 SDValue &Root, SelectionDAG &DAG,
1054 const SDLoc &DL, const X86Subtarget &Subtarget,
1055 SDValue *InGlue = nullptr) {
1056 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
1057 assert(Subtarget.is32Bit() && "Expecting 32 bit target");
1058 assert(VA.getValVT() == MVT::v64i1 &&
1059 "Expecting first location of 64 bit width type");
1060 assert(NextVA.getValVT() == VA.getValVT() &&
1061 "The locations should have the same type");
1062 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
1063 "The values should reside in two registers");
1064
1065 SDValue Lo, Hi;
1066 SDValue ArgValueLo, ArgValueHi;
1067
1069 const TargetRegisterClass *RC = &X86::GR32RegClass;
1070
1071 // Read a 32 bit value from the registers.
1072 if (nullptr == InGlue) {
1073 // When no physical register is present,
1074 // create an intermediate virtual register.
1075 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1076 ArgValueLo = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1077 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
1078 ArgValueHi = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1079 } else {
1080 // When a physical register is available read the value from it and glue
1081 // the reads together.
1082 ArgValueLo =
1083 DAG.getCopyFromReg(Root, DL, VA.getLocReg(), MVT::i32, *InGlue);
1084 *InGlue = ArgValueLo.getValue(2);
1085 ArgValueHi =
1086 DAG.getCopyFromReg(Root, DL, NextVA.getLocReg(), MVT::i32, *InGlue);
1087 *InGlue = ArgValueHi.getValue(2);
1088 }
1089
1090 // Convert the i32 type into v32i1 type.
1091 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
1092
1093 // Convert the i32 type into v32i1 type.
1094 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
1095
1096 // Concatenate the two values together.
1097 return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v64i1, Lo, Hi);
1098}
1099
1100/// The function will lower a register of various sizes (8/16/32/64)
1101/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
1102/// \returns a DAG node contains the operand after lowering to mask type.
1103static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
1104 const EVT &ValLoc, const SDLoc &DL,
1105 SelectionDAG &DAG) {
1106 SDValue ValReturned = ValArg;
1107
1108 if (ValVT == MVT::v1i1)
1109 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, ValReturned);
1110
1111 if (ValVT == MVT::v64i1) {
1112 // In 32 bit machine, this case is handled by getv64i1Argument
1113 assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
1114 // In 64 bit machine, There is no need to truncate the value only bitcast
1115 } else {
1116 MVT MaskLenVT;
1117 switch (ValVT.getSimpleVT().SimpleTy) {
1118 case MVT::v8i1:
1119 MaskLenVT = MVT::i8;
1120 break;
1121 case MVT::v16i1:
1122 MaskLenVT = MVT::i16;
1123 break;
1124 case MVT::v32i1:
1125 MaskLenVT = MVT::i32;
1126 break;
1127 default:
1128 llvm_unreachable("Expecting a vector of i1 types");
1129 }
1130
1131 ValReturned = DAG.getNode(ISD::TRUNCATE, DL, MaskLenVT, ValReturned);
1132 }
1133 return DAG.getBitcast(ValVT, ValReturned);
1134}
1135
1137 const SDLoc &dl, Register Reg, EVT VT,
1138 SDValue Glue) {
1139 SDVTList VTs = DAG.getVTList(VT, MVT::Other, MVT::Glue);
1140 SDValue Ops[] = {Chain, DAG.getRegister(Reg, VT), Glue};
1141 return DAG.getNode(X86ISD::POP_FROM_X87_REG, dl, VTs,
1142 ArrayRef(Ops, Glue.getNode() ? 3 : 2));
1143}
1144
1145/// Lower the result values of a call into the
1146/// appropriate copies out of appropriate physical registers.
1147///
1148SDValue X86TargetLowering::LowerCallResult(
1149 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
1150 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1152 uint32_t *RegMask) const {
1153
1154 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1155 // Assign locations to each value returned by this call.
1157 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1158 *DAG.getContext());
1159 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
1160
1161 // Copy all of the result registers out of their specified physreg.
1162 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
1163 ++I, ++InsIndex) {
1164 CCValAssign &VA = RVLocs[I];
1165 EVT CopyVT = VA.getLocVT();
1166
1167 // In some calling conventions we need to remove the used registers
1168 // from the register mask.
1169 if (RegMask) {
1170 for (MCPhysReg SubReg : TRI->subregs_inclusive(VA.getLocReg()))
1171 RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
1172 }
1173
1174 // Report an error if there was an attempt to return FP values via XMM
1175 // registers.
1176 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
1177 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
1178 if (VA.getLocReg() == X86::XMM1)
1179 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1180 else
1181 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1182 } else if (!Subtarget.hasSSE2() &&
1183 X86::FR64XRegClass.contains(VA.getLocReg()) &&
1184 CopyVT == MVT::f64) {
1185 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
1186 if (VA.getLocReg() == X86::XMM1)
1187 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1188 else
1189 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1190 }
1191
1192 // If we prefer to use the value in xmm registers, copy it out as f80 and
1193 // use a truncate to move it from fp stack reg to xmm reg.
1194 bool RoundAfterCopy = false;
1195 bool X87Result = VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1;
1196 if (X87Result && isScalarFPTypeInSSEReg(VA.getValVT())) {
1197 if (!Subtarget.hasX87())
1198 report_fatal_error("X87 register return with X87 disabled");
1199 CopyVT = MVT::f80;
1200 RoundAfterCopy = (CopyVT != VA.getLocVT());
1201 }
1202
1203 SDValue Val;
1204 if (VA.needsCustom()) {
1205 assert(VA.getValVT() == MVT::v64i1 &&
1206 "Currently the only custom case is when we split v64i1 to 2 regs");
1207 Val =
1208 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InGlue);
1209 } else {
1210 Chain =
1211 X87Result
1212 ? getPopFromX87Reg(DAG, Chain, dl, VA.getLocReg(), CopyVT, InGlue)
1213 .getValue(1)
1214 : DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InGlue)
1215 .getValue(1);
1216 Val = Chain.getValue(0);
1217 InGlue = Chain.getValue(2);
1218 }
1219
1220 if (RoundAfterCopy)
1221 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
1222 // This truncation won't change the value.
1223 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
1224
1225 if (VA.isExtInLoc()) {
1226 if (VA.getValVT().isVector() &&
1227 VA.getValVT().getScalarType() == MVT::i1 &&
1228 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1229 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1230 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1231 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
1232 } else
1233 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
1234 }
1235
1236 if (VA.getLocInfo() == CCValAssign::BCvt)
1237 Val = DAG.getBitcast(VA.getValVT(), Val);
1238
1239 InVals.push_back(Val);
1240 }
1241
1242 return Chain;
1243}
1244
1245/// Determines whether Args, either a set of outgoing arguments to a call, or a
1246/// set of incoming args of a call, contains an sret pointer that the callee
1247/// pops. This happens on most x86-32, System V platforms, unless register
1248/// parameters are in use (-mregparm=1+, regcallcc, etc).
1249template <typename T>
1250static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
1251 const SmallVectorImpl<CCValAssign> &ArgLocs,
1252 const X86Subtarget &Subtarget) {
1253 // Not C++20 (yet), so no concepts available.
1254 static_assert(std::is_same_v<T, ISD::OutputArg> ||
1255 std::is_same_v<T, ISD::InputArg>,
1256 "requires ISD::OutputArg or ISD::InputArg");
1257
1258 // Popping the sret pointer only happens on x86-32 System V ABI platforms
1259 // (Linux, Cygwin, BSDs, Mac, etc). That excludes Windows-minus-Cygwin and
1260 // MCU.
1261 const Triple &TT = Subtarget.getTargetTriple();
1262 if (!TT.isX86_32() || TT.isOSMSVCRT() || TT.isOSIAMCU())
1263 return false;
1264
1265 // Check if the first argument is marked sret and if it is passed in memory.
1266 bool IsSRetInMem = false;
1267 if (!Args.empty())
1268 IsSRetInMem = Args.front().Flags.isSRet() && ArgLocs.front().isMemLoc();
1269 return IsSRetInMem;
1270}
1271
1272/// Make a copy of an aggregate at address specified by "Src" to address
1273/// "Dst" with size and alignment information specified by the specific
1274/// parameter attribute. The copy will be passed as a byval function parameter.
1276 SDValue Chain, ISD::ArgFlagsTy Flags,
1277 SelectionDAG &DAG, const SDLoc &dl) {
1278 SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
1279 Align Alignment = Flags.getNonZeroByValAlign();
1280 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Alignment, Alignment,
1281 /*isVolatile*/ false, /*AlwaysInline=*/true,
1282 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(),
1284}
1285
1286/// Return true if the calling convention is one that we can guarantee TCO for.
1288 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
1291}
1292
1293/// Return true if we might ever do TCO for calls with this calling convention.
1295 switch (CC) {
1296 // C calling conventions:
1297 case CallingConv::C:
1298 case CallingConv::Win64:
1301 // Callee pop conventions:
1306 // Swift:
1307 case CallingConv::Swift:
1308 return true;
1309 default:
1310 return canGuaranteeTCO(CC);
1311 }
1312}
1313
1314/// Return true if the function is being made into a tailcall target by
1315/// changing its ABI.
1316static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
1317 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
1319}
1320
1321bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
1322 if (!CI->isTailCall())
1323 return false;
1324
1325 CallingConv::ID CalleeCC = CI->getCallingConv();
1326 if (!mayTailCallThisCC(CalleeCC))
1327 return false;
1328
1329 return true;
1330}
1331
1332SDValue
1333X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1335 const SDLoc &dl, SelectionDAG &DAG,
1336 const CCValAssign &VA,
1337 MachineFrameInfo &MFI, unsigned i) const {
1338 // Create the nodes corresponding to a load from this parameter slot.
1339 ISD::ArgFlagsTy Flags = Ins[i].Flags;
1340 bool AlwaysUseMutable = shouldGuaranteeTCO(
1341 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
1342 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
1343 EVT ValVT;
1344 MVT PtrVT = getPointerTy(DAG.getDataLayout());
1345
1346 // If value is passed by pointer we have address passed instead of the value
1347 // itself. No need to extend if the mask value and location share the same
1348 // absolute size.
1349 bool ExtendedInMem =
1350 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
1352
1353 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
1354 ValVT = VA.getLocVT();
1355 else
1356 ValVT = VA.getValVT();
1357
1358 // FIXME: For now, all byval parameter objects are marked mutable. This can be
1359 // changed with more analysis.
1360 // In case of tail call optimization mark all arguments mutable. Since they
1361 // could be overwritten by lowering of arguments in case of a tail call.
1362 if (Flags.isByVal()) {
1363 unsigned Bytes = Flags.getByValSize();
1364 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
1365
1366 // FIXME: For now, all byval parameter objects are marked as aliasing. This
1367 // can be improved with deeper analysis.
1368 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
1369 /*isAliased=*/true);
1370 return DAG.getFrameIndex(FI, PtrVT);
1371 }
1372
1373 EVT ArgVT = Ins[i].ArgVT;
1374
1375 // If this is a vector that has been split into multiple parts, don't elide
1376 // the copy. The layout on the stack may not match the packed in-memory
1377 // layout.
1378 bool ScalarizedVector = ArgVT.isVector() && !VA.getLocVT().isVector();
1379
1380 // This is an argument in memory. We might be able to perform copy elision.
1381 // If the argument is passed directly in memory without any extension, then we
1382 // can perform copy elision. Large vector types, for example, may be passed
1383 // indirectly by pointer.
1384 if (Flags.isCopyElisionCandidate() &&
1385 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
1386 !ScalarizedVector) {
1387 SDValue PartAddr;
1388 if (Ins[i].PartOffset == 0) {
1389 // If this is a one-part value or the first part of a multi-part value,
1390 // create a stack object for the entire argument value type and return a
1391 // load from our portion of it. This assumes that if the first part of an
1392 // argument is in memory, the rest will also be in memory.
1393 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
1394 /*IsImmutable=*/false);
1395 PartAddr = DAG.getFrameIndex(FI, PtrVT);
1396 return DAG.getLoad(
1397 ValVT, dl, Chain, PartAddr,
1399 }
1400
1401 // This is not the first piece of an argument in memory. See if there is
1402 // already a fixed stack object including this offset. If so, assume it
1403 // was created by the PartOffset == 0 branch above and create a load from
1404 // the appropriate offset into it.
1405 int64_t PartBegin = VA.getLocMemOffset();
1406 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
1407 int FI = MFI.getObjectIndexBegin();
1408 for (; MFI.isFixedObjectIndex(FI); ++FI) {
1409 int64_t ObjBegin = MFI.getObjectOffset(FI);
1410 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
1411 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
1412 break;
1413 }
1414 if (MFI.isFixedObjectIndex(FI)) {
1415 SDValue Addr =
1416 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
1417 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
1418 return DAG.getLoad(ValVT, dl, Chain, Addr,
1420 DAG.getMachineFunction(), FI, Ins[i].PartOffset));
1421 }
1422 }
1423
1424 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
1425 VA.getLocMemOffset(), isImmutable);
1426
1427 // Set SExt or ZExt flag.
1428 if (VA.getLocInfo() == CCValAssign::ZExt) {
1429 MFI.setObjectZExt(FI, true);
1430 } else if (VA.getLocInfo() == CCValAssign::SExt) {
1431 MFI.setObjectSExt(FI, true);
1432 }
1433
1434 MaybeAlign Alignment;
1435 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1436 ValVT != MVT::f80)
1437 Alignment = MaybeAlign(4);
1438 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1439 SDValue Val = DAG.getLoad(
1440 ValVT, dl, Chain, FIN,
1442 Alignment);
1443 return ExtendedInMem
1444 ? (VA.getValVT().isVector()
1445 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
1446 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
1447 : Val;
1448}
1449
1450// FIXME: Get this from tablegen.
1452 const X86Subtarget &Subtarget) {
1453 assert(Subtarget.is64Bit());
1454
1455 if (Subtarget.isCallingConvWin64(CallConv)) {
1456 static const MCPhysReg GPR64ArgRegsWin64[] = {
1457 X86::RCX, X86::RDX, X86::R8, X86::R9
1458 };
1459 return GPR64ArgRegsWin64;
1460 }
1461
1462 static const MCPhysReg GPR64ArgRegs64Bit[] = {
1463 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
1464 };
1465 return GPR64ArgRegs64Bit;
1466}
1467
1468// FIXME: Get this from tablegen.
1470 CallingConv::ID CallConv,
1471 const X86Subtarget &Subtarget) {
1472 assert(Subtarget.is64Bit());
1473 if (Subtarget.isCallingConvWin64(CallConv)) {
1474 // The XMM registers which might contain var arg parameters are shadowed
1475 // in their paired GPR. So we only need to save the GPR to their home
1476 // slots.
1477 // TODO: __vectorcall will change this.
1478 return {};
1479 }
1480
1481 bool isSoftFloat = Subtarget.useSoftFloat();
1482 if (isSoftFloat || !Subtarget.hasSSE1())
1483 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
1484 // registers.
1485 return {};
1486
1487 static const MCPhysReg XMMArgRegs64Bit[] = {
1488 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1489 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1490 };
1491 return XMMArgRegs64Bit;
1492}
1493
1494#ifndef NDEBUG
1496 return llvm::is_sorted(
1497 ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
1498 return A.getValNo() < B.getValNo();
1499 });
1500}
1501#endif
1502
1503namespace {
1504/// This is a helper class for lowering variable arguments parameters.
1505class VarArgsLoweringHelper {
1506public:
1507 VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
1508 SelectionDAG &DAG, const X86Subtarget &Subtarget,
1509 CallingConv::ID CallConv, CCState &CCInfo)
1510 : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
1511 TheMachineFunction(DAG.getMachineFunction()),
1512 TheFunction(TheMachineFunction.getFunction()),
1513 FrameInfo(TheMachineFunction.getFrameInfo()),
1514 FrameLowering(*Subtarget.getFrameLowering()),
1515 TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
1516 CCInfo(CCInfo) {}
1517
1518 // Lower variable arguments parameters.
1519 void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
1520
1521private:
1522 void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
1523
1524 void forwardMustTailParameters(SDValue &Chain);
1525
1526 bool is64Bit() const { return Subtarget.is64Bit(); }
1527 bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
1528
1529 X86MachineFunctionInfo *FuncInfo;
1530 const SDLoc &DL;
1531 SelectionDAG &DAG;
1532 const X86Subtarget &Subtarget;
1533 MachineFunction &TheMachineFunction;
1534 const Function &TheFunction;
1535 MachineFrameInfo &FrameInfo;
1536 const TargetFrameLowering &FrameLowering;
1537 const TargetLowering &TargLowering;
1538 CallingConv::ID CallConv;
1539 CCState &CCInfo;
1540};
1541} // namespace
1542
1543void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
1544 SDValue &Chain, unsigned StackSize) {
1545 // If the function takes variable number of arguments, make a frame index for
1546 // the start of the first vararg value... for expansion of llvm.va_start. We
1547 // can skip this if there are no va_start calls.
1548 if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
1549 CallConv != CallingConv::X86_ThisCall)) {
1550 FuncInfo->setVarArgsFrameIndex(
1551 FrameInfo.CreateFixedObject(1, StackSize, true));
1552 }
1553
1554 // 64-bit calling conventions support varargs and register parameters, so we
1555 // have to do extra work to spill them in the prologue.
1556 if (is64Bit()) {
1557 // Find the first unallocated argument registers.
1558 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
1559 ArrayRef<MCPhysReg> ArgXMMs =
1560 get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
1561 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
1562 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
1563
1564 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
1565 "SSE register cannot be used when SSE is disabled!");
1566
1567 if (isWin64()) {
1568 // Get to the caller-allocated home save location. Add 8 to account
1569 // for the return address.
1570 int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
1571 FuncInfo->setRegSaveFrameIndex(
1572 FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
1573 // Fixup to set vararg frame on shadow area (4 x i64).
1574 if (NumIntRegs < 4)
1575 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
1576 } else {
1577 // For X86-64, if there are vararg parameters that are passed via
1578 // registers, then we must store them to their spots on the stack so
1579 // they may be loaded by dereferencing the result of va_next.
1580 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
1581 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
1582 FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
1583 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
1584 }
1585
1587 LiveGPRs; // list of SDValue for GPR registers keeping live input value
1588 SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
1589 // keeping live input value
1590 SDValue ALVal; // if applicable keeps SDValue for %al register
1591
1592 // Gather all the live in physical registers.
1593 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
1594 Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
1595 LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
1596 }
1597 const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
1598 if (!AvailableXmms.empty()) {
1599 Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1600 ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
1601 for (MCPhysReg Reg : AvailableXmms) {
1602 // FastRegisterAllocator spills virtual registers at basic
1603 // block boundary. That leads to usages of xmm registers
1604 // outside of check for %al. Pass physical registers to
1605 // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
1606 TheMachineFunction.getRegInfo().addLiveIn(Reg);
1607 LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
1608 }
1609 }
1610
1611 // Store the integer parameter registers.
1613 SDValue RSFIN =
1614 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
1615 TargLowering.getPointerTy(DAG.getDataLayout()));
1616 unsigned Offset = FuncInfo->getVarArgsGPOffset();
1617 for (SDValue Val : LiveGPRs) {
1618 SDValue FIN = DAG.getNode(ISD::ADD, DL,
1619 TargLowering.getPointerTy(DAG.getDataLayout()),
1620 RSFIN, DAG.getIntPtrConstant(Offset, DL));
1621 SDValue Store =
1622 DAG.getStore(Val.getValue(1), DL, Val, FIN,
1624 DAG.getMachineFunction(),
1625 FuncInfo->getRegSaveFrameIndex(), Offset));
1626 MemOps.push_back(Store);
1627 Offset += 8;
1628 }
1629
1630 // Now store the XMM (fp + vector) parameter registers.
1631 if (!LiveXMMRegs.empty()) {
1632 SmallVector<SDValue, 12> SaveXMMOps;
1633 SaveXMMOps.push_back(Chain);
1634 SaveXMMOps.push_back(ALVal);
1635 SaveXMMOps.push_back(RSFIN);
1636 SaveXMMOps.push_back(
1637 DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
1638 llvm::append_range(SaveXMMOps, LiveXMMRegs);
1639 MachineMemOperand *StoreMMO =
1642 DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(),
1643 Offset),
1645 MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS,
1646 DL, DAG.getVTList(MVT::Other),
1647 SaveXMMOps, MVT::i8, StoreMMO));
1648 }
1649
1650 if (!MemOps.empty())
1651 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
1652 }
1653}
1654
1655void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
1656 // Find the largest legal vector type.
1657 MVT VecVT = MVT::Other;
1658 // FIXME: Only some x86_32 calling conventions support AVX512.
1659 if (Subtarget.useAVX512Regs() &&
1660 (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
1661 CallConv == CallingConv::Intel_OCL_BI)))
1662 VecVT = MVT::v16f32;
1663 else if (Subtarget.hasAVX())
1664 VecVT = MVT::v8f32;
1665 else if (Subtarget.hasSSE2())
1666 VecVT = MVT::v4f32;
1667
1668 // We forward some GPRs and some vector types.
1669 SmallVector<MVT, 2> RegParmTypes;
1670 MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
1671 RegParmTypes.push_back(IntVT);
1672 if (VecVT != MVT::Other)
1673 RegParmTypes.push_back(VecVT);
1674
1675 // Compute the set of forwarded registers. The rest are scratch.
1677 FuncInfo->getForwardedMustTailRegParms();
1678 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
1679
1680 // Forward AL for SysV x86_64 targets, since it is used for varargs.
1681 if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
1682 Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1683 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
1684 }
1685
1686 // Copy all forwards from physical to virtual registers.
1687 for (ForwardedRegister &FR : Forwards) {
1688 // FIXME: Can we use a less constrained schedule?
1689 SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
1690 FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
1691 TargLowering.getRegClassFor(FR.VT));
1692 Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
1693 }
1694}
1695
1696void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
1697 unsigned StackSize) {
1698 // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
1699 // If necessary, it would be set into the correct value later.
1700 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
1701 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1702
1703 if (FrameInfo.hasVAStart())
1704 createVarArgAreaAndStoreRegisters(Chain, StackSize);
1705
1706 if (FrameInfo.hasMustTailInVarArgFunc())
1707 forwardMustTailParameters(Chain);
1708}
1709
1710SDValue X86TargetLowering::LowerFormalArguments(
1711 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1712 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1713 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1714 MachineFunction &MF = DAG.getMachineFunction();
1715 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
1716
1717 const Function &F = MF.getFunction();
1718 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
1719 F.getName() == "main")
1720 FuncInfo->setForceFramePointer(true);
1721
1722 MachineFrameInfo &MFI = MF.getFrameInfo();
1723 bool Is64Bit = Subtarget.is64Bit();
1724 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
1725
1726 // On x86_64 with x87 disabled, x86_fp80 cannot be handled: the type would
1727 // need to be returned/passed in x87 registers (FP0/FP1) which are
1728 // unavailable. Emit a clear diagnostic instead of crashing later with
1729 // "Cannot select: build_pair".
1730 if (Is64Bit && !Subtarget.hasX87()) {
1731 if (F.getReturnType()->isX86_FP80Ty() ||
1732 any_of(F.args(), [](const Argument &Arg) {
1733 return Arg.getType()->isX86_FP80Ty();
1734 }))
1736 "cannot use x86_fp80 type with x87 disabled on x86_64 target");
1737 }
1738
1739 assert(
1740 !(IsVarArg && canGuaranteeTCO(CallConv)) &&
1741 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
1742
1743 // Assign locations to all of the incoming arguments.
1745 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1746
1747 // Allocate shadow area for Win64.
1748 if (IsWin64)
1749 CCInfo.AllocateStack(32, Align(8));
1750
1751 CCInfo.AnalyzeArguments(Ins, CC_X86);
1752
1753 // In vectorcall calling convention a second pass is required for the HVA
1754 // types.
1755 if (CallingConv::X86_VectorCall == CallConv) {
1756 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
1757 }
1758
1759 // The next loop assumes that the locations are in the same order of the
1760 // input arguments.
1761 assert(isSortedByValueNo(ArgLocs) &&
1762 "Argument Location list must be sorted before lowering");
1763
1764 SDValue ArgValue;
1765 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
1766 ++I, ++InsIndex) {
1767 assert(InsIndex < Ins.size() && "Invalid Ins index");
1768 CCValAssign &VA = ArgLocs[I];
1769
1770 if (VA.isRegLoc()) {
1771 EVT RegVT = VA.getLocVT();
1772 if (VA.needsCustom()) {
1773 assert(
1774 VA.getValVT() == MVT::v64i1 &&
1775 "Currently the only custom case is when we split v64i1 to 2 regs");
1776
1777 // v64i1 values, in regcall calling convention, that are
1778 // compiled to 32 bit arch, are split up into two registers.
1779 ArgValue =
1780 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
1781 } else {
1782 const TargetRegisterClass *RC;
1783 if (RegVT == MVT::i8)
1784 RC = &X86::GR8RegClass;
1785 else if (RegVT == MVT::i16)
1786 RC = &X86::GR16RegClass;
1787 else if (RegVT == MVT::i32)
1788 RC = &X86::GR32RegClass;
1789 else if (Is64Bit && RegVT == MVT::i64)
1790 RC = &X86::GR64RegClass;
1791 else if (RegVT == MVT::f16)
1792 RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
1793 else if (RegVT == MVT::f32)
1794 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
1795 else if (RegVT == MVT::f64)
1796 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
1797 else if (RegVT == MVT::f80)
1798 RC = &X86::RFP80RegClass;
1799 else if (RegVT == MVT::f128)
1800 RC = &X86::VR128RegClass;
1801 else if (RegVT.is512BitVector())
1802 RC = &X86::VR512RegClass;
1803 else if (RegVT.is256BitVector())
1804 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
1805 else if (RegVT.is128BitVector())
1806 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
1807 else if (RegVT == MVT::x86mmx)
1808 RC = &X86::VR64RegClass;
1809 else if (RegVT == MVT::v1i1)
1810 RC = &X86::VK1RegClass;
1811 else if (RegVT == MVT::v8i1)
1812 RC = &X86::VK8RegClass;
1813 else if (RegVT == MVT::v16i1)
1814 RC = &X86::VK16RegClass;
1815 else if (RegVT == MVT::v32i1)
1816 RC = &X86::VK32RegClass;
1817 else if (RegVT == MVT::v64i1)
1818 RC = &X86::VK64RegClass;
1819 else
1820 llvm_unreachable("Unknown argument type!");
1821
1822 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1823 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
1824 }
1825
1826 // If this is an 8 or 16-bit value, it is really passed promoted to 32
1827 // bits. Insert an assert[sz]ext to capture this, then truncate to the
1828 // right size.
1829 if (VA.getLocInfo() == CCValAssign::SExt)
1830 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
1831 DAG.getValueType(VA.getValVT()));
1832 else if (VA.getLocInfo() == CCValAssign::ZExt)
1833 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
1834 DAG.getValueType(VA.getValVT()));
1835 else if (VA.getLocInfo() == CCValAssign::BCvt)
1836 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
1837
1838 if (VA.isExtInLoc()) {
1839 // Handle MMX values passed in XMM regs.
1840 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
1841 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
1842 else if (VA.getValVT().isVector() &&
1843 VA.getValVT().getScalarType() == MVT::i1 &&
1844 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1845 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1846 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1847 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
1848 } else
1849 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
1850 }
1851 } else {
1852 assert(VA.isMemLoc());
1853 ArgValue =
1854 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
1855 }
1856
1857 // If value is passed via pointer - do a load.
1858 if (VA.getLocInfo() == CCValAssign::Indirect &&
1859 !(Ins[I].Flags.isByVal() && VA.isRegLoc())) {
1860 ArgValue =
1861 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
1862 }
1863
1864 InVals.push_back(ArgValue);
1865 }
1866
1867 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
1868 if (Ins[I].Flags.isSwiftAsync()) {
1869 auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
1870 if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF))
1871 X86FI->setHasSwiftAsyncContext(true);
1872 else {
1873 int PtrSize = Subtarget.is64Bit() ? 8 : 4;
1874 int FI =
1875 MF.getFrameInfo().CreateStackObject(PtrSize, Align(PtrSize), false);
1876 X86FI->setSwiftAsyncContextFrameIdx(FI);
1877 SDValue St = DAG.getStore(
1878 DAG.getEntryNode(), dl, InVals[I],
1879 DAG.getFrameIndex(FI, PtrSize == 8 ? MVT::i64 : MVT::i32),
1881 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
1882 }
1883 }
1884
1885 // Swift calling convention does not require we copy the sret argument
1886 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
1887 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
1888 continue;
1889
1890 // All x86 ABIs require that for returning structs by value we copy the
1891 // sret argument into %rax/%eax (depending on ABI) for the return. Save
1892 // the argument into a virtual register so that we can access it from the
1893 // return points.
1894 if (Ins[I].Flags.isSRet()) {
1895 assert(!FuncInfo->getSRetReturnReg() &&
1896 "SRet return has already been set");
1897 MVT PtrTy = getPointerTy(DAG.getDataLayout());
1898 Register Reg =
1900 FuncInfo->setSRetReturnReg(Reg);
1901 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
1902 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
1903 break;
1904 }
1905 }
1906
1907 unsigned StackSize = CCInfo.getStackSize();
1908 // Align stack specially for tail calls.
1909 if (shouldGuaranteeTCO(CallConv,
1911 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
1912
1913 if (IsVarArg)
1914 VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
1915 .lowerVarArgsParameters(Chain, StackSize);
1916
1917 // Some CCs need callee pop.
1918 if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
1920 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
1921 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
1922 // X86 interrupts must pop the error code (and the alignment padding) if
1923 // present.
1924 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
1925 } else {
1926 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
1927 // If this is an sret function, the return should pop the hidden pointer.
1928 if (hasCalleePopSRet(Ins, ArgLocs, Subtarget))
1929 FuncInfo->setBytesToPopOnReturn(4);
1930 }
1931
1932 if (!Is64Bit) {
1933 // RegSaveFrameIndex is X86-64 only.
1934 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1935 }
1936
1937 FuncInfo->setArgumentStackSize(StackSize);
1938
1939 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
1940 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
1941 if (Personality == EHPersonality::CoreCLR) {
1942 assert(Is64Bit);
1943 // TODO: Add a mechanism to frame lowering that will allow us to indicate
1944 // that we'd prefer this slot be allocated towards the bottom of the frame
1945 // (i.e. near the stack pointer after allocating the frame). Every
1946 // funclet needs a copy of this slot in its (mostly empty) frame, and the
1947 // offset from the bottom of this and each funclet's frame must be the
1948 // same, so the size of funclets' (mostly empty) frames is dictated by
1949 // how far this slot is from the bottom (since they allocate just enough
1950 // space to accommodate holding this slot at the correct offset).
1951 int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
1952 EHInfo->PSPSymFrameIdx = PSPSymFI;
1953 }
1954 }
1955
1956 if (shouldDisableArgRegFromCSR(CallConv) ||
1957 F.hasFnAttribute("no_caller_saved_registers")) {
1958 MachineRegisterInfo &MRI = MF.getRegInfo();
1959 for (std::pair<MCRegister, Register> Pair : MRI.liveins())
1960 MRI.disableCalleeSavedRegister(Pair.first);
1961 }
1962
1963 if (CallingConv::PreserveNone == CallConv)
1964 for (const ISD::InputArg &In : Ins) {
1965 if (In.Flags.isSwiftSelf() || In.Flags.isSwiftAsync() ||
1966 In.Flags.isSwiftError()) {
1967 errorUnsupported(DAG, dl,
1968 "Swift attributes can't be used with preserve_none");
1969 break;
1970 }
1971 }
1972
1973 return Chain;
1974}
1975
1976SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1977 SDValue Arg, const SDLoc &dl,
1978 SelectionDAG &DAG,
1979 const CCValAssign &VA,
1980 ISD::ArgFlagsTy Flags,
1981 bool isByVal) const {
1982 unsigned LocMemOffset = VA.getLocMemOffset();
1983 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1984 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1985 StackPtr, PtrOff);
1986 if (isByVal)
1987 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
1988
1989 MaybeAlign Alignment;
1990 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1991 Arg.getSimpleValueType() != MVT::f80)
1992 Alignment = MaybeAlign(4);
1993 return DAG.getStore(
1994 Chain, dl, Arg, PtrOff,
1996 Alignment);
1997}
1998
1999/// Emit a load of return address if tail call
2000/// optimization is performed and it is required.
2001SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
2002 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
2003 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
2004 // Adjust the Return address stack slot.
2005 EVT VT = getPointerTy(DAG.getDataLayout());
2006 OutRetAddr = getReturnAddressFrameIndex(DAG);
2007
2008 // Load the "old" Return address.
2009 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
2010 return SDValue(OutRetAddr.getNode(), 1);
2011}
2012
2013/// Emit a store of the return address if tail call
2014/// optimization is performed and it is required (FPDiff!=0).
2016 SDValue Chain, SDValue RetAddrFrIdx,
2017 EVT PtrVT, unsigned SlotSize,
2018 int FPDiff, const SDLoc &dl) {
2019 // Store the return address to the appropriate stack slot.
2020 if (!FPDiff) return Chain;
2021 // Calculate the new stack slot for the return address.
2022 int NewReturnAddrFI =
2023 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
2024 false);
2025 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
2026 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
2028 DAG.getMachineFunction(), NewReturnAddrFI));
2029 return Chain;
2030}
2031
2032/// Returns a vector_shuffle mask for an movs{s|d}, movd
2033/// operation of specified width.
2034SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
2035 SDValue V1, SDValue V2) const {
2036 unsigned NumElems = VT.getVectorNumElements();
2037 SmallVector<int, 8> Mask;
2038 Mask.push_back(NumElems);
2039 for (unsigned i = 1; i != NumElems; ++i)
2040 Mask.push_back(i);
2041 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
2042}
2043
2044// Returns the type of copying which is required to set up a byval argument to
2045// a tail-called function. This isn't needed for non-tail calls, because they
2046// always need the equivalent of CopyOnce, but tail-calls sometimes need two to
2047// avoid clobbering another argument (CopyViaTemp), and sometimes can be
2048// optimised to zero copies when forwarding an argument from the caller's
2049// caller (NoCopy).
2050X86TargetLowering::ByValCopyKind X86TargetLowering::ByValNeedsCopyForTailCall(
2051 SelectionDAG &DAG, SDValue Src, SDValue Dst, ISD::ArgFlagsTy Flags) const {
2052 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
2053
2054 // Globals are always safe to copy from.
2056 return CopyOnce;
2057
2058 // Can only analyse frame index nodes, conservatively assume we need a
2059 // temporary.
2060 auto *SrcFrameIdxNode = dyn_cast<FrameIndexSDNode>(Src);
2061 auto *DstFrameIdxNode = dyn_cast<FrameIndexSDNode>(Dst);
2062 if (!SrcFrameIdxNode || !DstFrameIdxNode)
2063 return CopyViaTemp;
2064
2065 int SrcFI = SrcFrameIdxNode->getIndex();
2066 int DstFI = DstFrameIdxNode->getIndex();
2067 assert(MFI.isFixedObjectIndex(DstFI) &&
2068 "byval passed in non-fixed stack slot");
2069
2070 int64_t SrcOffset = MFI.getObjectOffset(SrcFI);
2071 int64_t DstOffset = MFI.getObjectOffset(DstFI);
2072
2073 // If the source is in the local frame, then the copy to the argument
2074 // memory is always valid.
2075 bool FixedSrc = MFI.isFixedObjectIndex(SrcFI);
2076 if (!FixedSrc || (FixedSrc && SrcOffset < 0))
2077 return CopyOnce;
2078
2079 // If the value is already in the correct location, then no copying is
2080 // needed. If not, then we need to copy via a temporary.
2081 if (SrcOffset == DstOffset)
2082 return NoCopy;
2083 else
2084 return CopyViaTemp;
2085}
2086
2087SDValue
2088X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2089 SmallVectorImpl<SDValue> &InVals) const {
2090 SelectionDAG &DAG = CLI.DAG;
2091 SDLoc &dl = CLI.DL;
2092 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2093 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2094 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2095 SDValue Chain = CLI.Chain;
2096 SDValue Callee = CLI.Callee;
2097 CallingConv::ID CallConv = CLI.CallConv;
2098 bool &isTailCall = CLI.IsTailCall;
2099 bool isVarArg = CLI.IsVarArg;
2100 const auto *CB = CLI.CB;
2101
2102 MachineFunction &MF = DAG.getMachineFunction();
2103 bool Is64Bit = Subtarget.is64Bit();
2104 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
2105 bool ShouldGuaranteeTCO = shouldGuaranteeTCO(
2106 CallConv, MF.getTarget().Options.GuaranteedTailCallOpt);
2107 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
2108 bool HasNCSR = (CB && isa<CallInst>(CB) &&
2109 CB->hasFnAttr("no_caller_saved_registers"));
2110 bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());
2111 bool IsCFICall = IsIndirectCall && CLI.CFIType;
2112 const Module *M = MF.getFunction().getParent();
2113
2114 // If the indirect call target has the nocf_check attribute, the call needs
2115 // the NOTRACK prefix. For simplicity just disable tail calls as there are
2116 // so many variants.
2117 // FIXME: This will cause backend errors if the user forces the issue.
2118 bool IsNoTrackIndirectCall = IsIndirectCall && CB->doesNoCfCheck() &&
2119 M->getModuleFlag("cf-protection-branch");
2120 if (IsNoTrackIndirectCall)
2121 isTailCall = false;
2122
2123 MachineFunction::CallSiteInfo CSInfo;
2124 if (CallConv == CallingConv::X86_INTR)
2125 report_fatal_error("X86 interrupts may not be called directly");
2126
2127 // Set type id for call site info.
2128 setTypeIdForCallsiteInfo(CB, MF, CSInfo);
2129
2130 if (IsIndirectCall && !IsWin64 &&
2131 M->getModuleFlag("import-call-optimization"))
2132 errorUnsupported(DAG, dl,
2133 "Indirect calls must have a normal calling convention if "
2134 "Import Call Optimization is enabled");
2135
2136 // Analyze operands of the call, assigning locations to each operand.
2138 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2139
2140 // Allocate shadow area for Win64.
2141 if (IsWin64)
2142 CCInfo.AllocateStack(32, Align(8));
2143
2144 CCInfo.AnalyzeArguments(Outs, CC_X86);
2145
2146 // In vectorcall calling convention a second pass is required for the HVA
2147 // types.
2148 if (CallingConv::X86_VectorCall == CallConv) {
2149 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
2150 }
2151
2152 // We cannot guarantee TCO for mismatched calling conventions.
2153 if (isTailCall && ShouldGuaranteeTCO) {
2154 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
2155 isTailCall = (CallConv == CallerCC);
2156 }
2157
2158 // Check if this tail call is a "sibling" call, which is loosely defined to
2159 // be a tail call that doesn't require heroics like moving the return
2160 // address or swapping byval arguments. We treat some musttail calls as
2161 // sibling calls to avoid unnecessary argument copies.
2162 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
2163 bool IsSibcall = false;
2164 if (isTailCall) {
2165 IsSibcall = isEligibleForSiblingCallOpt(CLI, CCInfo, ArgLocs);
2166 isTailCall = IsSibcall || IsMustTail || ShouldGuaranteeTCO;
2167 }
2168
2169 if (isTailCall)
2170 ++NumTailCalls;
2171
2172 if (IsMustTail && !isTailCall)
2173 report_fatal_error("failed to perform tail call elimination on a call "
2174 "site marked musttail");
2175
2176 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
2177 "Var args not supported with calling convention fastcc, ghc or hipe");
2178
2179 // Get a count of how many bytes are to be pushed on the stack.
2180 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
2181 if (IsSibcall)
2182 // This is a sibcall. The memory operands are available in caller's
2183 // own caller's stack.
2184 NumBytes = 0;
2185 else if (ShouldGuaranteeTCO && canGuaranteeTCO(CallConv))
2186 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
2187
2188 // A sibcall is ABI-compatible and does not need to adjust the stack pointer.
2189 int FPDiff = 0;
2190 if (isTailCall && ShouldGuaranteeTCO && !IsSibcall) {
2191 // Lower arguments at fp - stackoffset + fpdiff.
2192 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
2193
2194 FPDiff = NumBytesCallerPushed - NumBytes;
2195
2196 // Set the delta of movement of the returnaddr stackslot.
2197 // But only set if delta is greater than previous delta.
2198 if (FPDiff < X86Info->getTCReturnAddrDelta())
2199 X86Info->setTCReturnAddrDelta(FPDiff);
2200 }
2201
2202 unsigned NumBytesToPush = NumBytes;
2203 unsigned NumBytesToPop = NumBytes;
2204
2206 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2207
2208 // If we are doing a tail-call, any byval arguments will be written to stack
2209 // space which was used for incoming arguments. If any the values being used
2210 // are incoming byval arguments to this function, then they might be
2211 // overwritten by the stores of the outgoing arguments. To avoid this, we
2212 // need to make a temporary copy of them in local stack space, then copy back
2213 // to the argument area.
2214 // FIXME: There's potential to improve the code by using virtual registers for
2215 // temporary storage, and letting the register allocator spill if needed.
2216 SmallVector<SDValue, 8> ByValTemporaries;
2217 SDValue ByValTempChain;
2218 if (isTailCall) {
2219 // Use null SDValue to mean "no temporary recorded for this arg index".
2220 ByValTemporaries.assign(OutVals.size(), SDValue());
2221
2222 SmallVector<SDValue, 8> ByValCopyChains;
2223 for (const CCValAssign &VA : ArgLocs) {
2224 unsigned ArgIdx = VA.getValNo();
2225 SDValue Src = OutVals[ArgIdx];
2226 ISD::ArgFlagsTy Flags = Outs[ArgIdx].Flags;
2227
2228 if (!Flags.isByVal())
2229 continue;
2230
2231 auto PtrVT = getPointerTy(DAG.getDataLayout());
2232
2233 if (!StackPtr.getNode())
2234 StackPtr =
2235 DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), PtrVT);
2236
2237 // Destination: where this byval should live in the callee’s frame
2238 // after the tail call.
2239 int64_t Offset = VA.getLocMemOffset() + FPDiff;
2240 uint64_t Size = VA.getLocVT().getFixedSizeInBits() / 8;
2242 /*IsImmutable=*/true);
2243 SDValue Dst = DAG.getFrameIndex(FI, PtrVT);
2244
2245 ByValCopyKind Copy = ByValNeedsCopyForTailCall(DAG, Src, Dst, Flags);
2246
2247 if (Copy == NoCopy) {
2248 // If the argument is already at the correct offset on the stack
2249 // (because we are forwarding a byval argument from our caller), we
2250 // don't need any copying.
2251 continue;
2252 } else if (Copy == CopyOnce) {
2253 // If the argument is in our local stack frame, no other argument
2254 // preparation can clobber it, so we can copy it to the final location
2255 // later.
2256 ByValTemporaries[ArgIdx] = Src;
2257 } else {
2258 assert(Copy == CopyViaTemp && "unexpected enum value");
2259 // If we might be copying this argument from the outgoing argument
2260 // stack area, we need to copy via a temporary in the local stack
2261 // frame.
2262 MachineFrameInfo &MFI = MF.getFrameInfo();
2263 int TempFrameIdx = MFI.CreateStackObject(Flags.getByValSize(),
2264 Flags.getNonZeroByValAlign(),
2265 /*isSS=*/false);
2266 SDValue Temp =
2267 DAG.getFrameIndex(TempFrameIdx, getPointerTy(DAG.getDataLayout()));
2268
2269 SDValue CopyChain =
2270 CreateCopyOfByValArgument(Src, Temp, Chain, Flags, DAG, dl);
2271 ByValCopyChains.push_back(CopyChain);
2272 ByValTemporaries[ArgIdx] = Temp;
2273 }
2274 }
2275 if (!ByValCopyChains.empty())
2276 ByValTempChain =
2277 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ByValCopyChains);
2278 }
2279
2280 // If we have an inalloca argument, all stack space has already been allocated
2281 // for us and be right at the top of the stack. We don't support multiple
2282 // arguments passed in memory when using inalloca.
2283 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
2284 NumBytesToPush = 0;
2285 if (!ArgLocs.back().isMemLoc())
2286 report_fatal_error("cannot use inalloca attribute on a register "
2287 "parameter");
2288 if (ArgLocs.back().getLocMemOffset() != 0)
2289 report_fatal_error("any parameter with the inalloca attribute must be "
2290 "the only memory argument");
2291 } else if (CLI.IsPreallocated) {
2292 assert(ArgLocs.back().isMemLoc() &&
2293 "cannot use preallocated attribute on a register "
2294 "parameter");
2295 SmallVector<size_t, 4> PreallocatedOffsets;
2296 for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
2297 if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
2298 PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
2299 }
2300 }
2301 auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
2302 size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
2303 MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
2304 MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
2305 NumBytesToPush = 0;
2306 }
2307
2308 if (!IsSibcall && !IsMustTail)
2309 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
2310 NumBytes - NumBytesToPush, dl);
2311
2312 SDValue RetAddrFrIdx;
2313 // Load return address for tail calls.
2314 if (isTailCall && FPDiff)
2315 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
2316 Is64Bit, FPDiff, dl);
2317
2319 SmallVector<SDValue, 8> MemOpChains;
2320
2321 // The next loop assumes that the locations are in the same order of the
2322 // input arguments.
2323 assert(isSortedByValueNo(ArgLocs) &&
2324 "Argument Location list must be sorted before lowering");
2325
2326 // Walk the register/memloc assignments, inserting copies/loads. In the case
2327 // of tail call optimization arguments are handle later.
2328 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
2329 ++I, ++OutIndex) {
2330 assert(OutIndex < Outs.size() && "Invalid Out index");
2331 // Skip inalloca/preallocated arguments, they have already been written.
2332 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
2333 if (Flags.isInAlloca() || Flags.isPreallocated())
2334 continue;
2335
2336 CCValAssign &VA = ArgLocs[I];
2337 EVT RegVT = VA.getLocVT();
2338 SDValue Arg = OutVals[OutIndex];
2339 bool isByVal = Flags.isByVal();
2340
2341 // Promote the value if needed.
2342 switch (VA.getLocInfo()) {
2343 default: llvm_unreachable("Unknown loc info!");
2344 case CCValAssign::Full: break;
2345 case CCValAssign::SExt:
2346 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
2347 break;
2348 case CCValAssign::ZExt:
2349 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
2350 break;
2351 case CCValAssign::AExt:
2352 if (Arg.getValueType().isVector() &&
2353 Arg.getValueType().getVectorElementType() == MVT::i1)
2354 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
2355 else if (RegVT.is128BitVector()) {
2356 // Special case: passing MMX values in XMM registers.
2357 Arg = DAG.getBitcast(MVT::i64, Arg);
2358 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
2359 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
2360 } else
2361 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
2362 break;
2363 case CCValAssign::BCvt:
2364 Arg = DAG.getBitcast(RegVT, Arg);
2365 break;
2366 case CCValAssign::Indirect: {
2367 if (isByVal) {
2368 // Memcpy the argument to a temporary stack slot to prevent
2369 // the caller from seeing any modifications the callee may make
2370 // as guaranteed by the `byval` attribute.
2371 int FrameIdx = MF.getFrameInfo().CreateStackObject(
2372 Flags.getByValSize(),
2373 std::max(Align(16), Flags.getNonZeroByValAlign()), false);
2374 SDValue StackSlot =
2375 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
2376 Chain =
2377 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
2378 // From now on treat this as a regular pointer
2379 Arg = StackSlot;
2380 isByVal = false;
2381 } else {
2382 // Store the argument.
2383 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
2384 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2385 Chain = DAG.getStore(
2386 Chain, dl, Arg, SpillSlot,
2388 Arg = SpillSlot;
2389 }
2390 break;
2391 }
2392 }
2393
2394 if (VA.needsCustom()) {
2395 assert(VA.getValVT() == MVT::v64i1 &&
2396 "Currently the only custom case is when we split v64i1 to 2 regs");
2397 // Split v64i1 value into two registers
2398 Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
2399 } else if (VA.isRegLoc()) {
2400 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2401 const TargetOptions &Options = DAG.getTarget().Options;
2402 if (Options.EmitCallSiteInfo)
2403 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), I);
2404 if (isVarArg && IsWin64) {
2405 // Win64 ABI requires argument XMM reg to be copied to the corresponding
2406 // shadow reg if callee is a varargs function.
2407 Register ShadowReg;
2408 switch (VA.getLocReg()) {
2409 case X86::XMM0: ShadowReg = X86::RCX; break;
2410 case X86::XMM1: ShadowReg = X86::RDX; break;
2411 case X86::XMM2: ShadowReg = X86::R8; break;
2412 case X86::XMM3: ShadowReg = X86::R9; break;
2413 }
2414 if (ShadowReg)
2415 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
2416 }
2417 } else if (!IsSibcall && (!isTailCall || (isByVal && !IsMustTail))) {
2418 assert(VA.isMemLoc());
2419 if (!StackPtr.getNode())
2420 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
2422 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
2423 dl, DAG, VA, Flags, isByVal));
2424 }
2425 }
2426
2427 if (!MemOpChains.empty())
2428 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2429
2430 if (Subtarget.isPICStyleGOT()) {
2431 // ELF / PIC requires GOT in the EBX register before function calls via PLT
2432 // GOT pointer.
2433 if (!isTailCall) {
2434 // Only PLT calls (GlobalAddress or ExternalSymbol) require the GOT in
2435 // EBX. Indirect calls through a register or an absolute address do not
2436 // go through the PLT and do not need EBX to hold the GOT base.
2437 if ((Callee->getOpcode() == ISD::GlobalAddress ||
2438 Callee->getOpcode() == ISD::ExternalSymbol))
2439 RegsToPass.push_back(std::make_pair(
2440 Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2441 getPointerTy(DAG.getDataLayout()))));
2442 } else {
2443 // If we are tail calling and generating PIC/GOT style code load the
2444 // address of the callee into ECX. The value in ecx is used as target of
2445 // the tail jump. This is done to circumvent the ebx/callee-saved problem
2446 // for tail calls on PIC/GOT architectures. Normally we would just put the
2447 // address of GOT into ebx and then call target@PLT. But for tail calls
2448 // ebx would be restored (since ebx is callee saved) before jumping to the
2449 // target@PLT.
2450
2451 // Note: The actual moving to ECX is done further down.
2452 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
2453 if (G && !G->getGlobal()->hasLocalLinkage() &&
2454 G->getGlobal()->hasDefaultVisibility())
2455 Callee = LowerGlobalAddress(Callee, DAG);
2456 else if (isa<ExternalSymbolSDNode>(Callee))
2457 Callee = LowerExternalSymbol(Callee, DAG);
2458 }
2459 }
2460
2461 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
2462 (Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) {
2463 // From AMD64 ABI document:
2464 // For calls that may call functions that use varargs or stdargs
2465 // (prototype-less calls or calls to functions containing ellipsis (...) in
2466 // the declaration) %al is used as hidden argument to specify the number
2467 // of SSE registers used. The contents of %al do not need to match exactly
2468 // the number of registers, but must be an ubound on the number of SSE
2469 // registers used and is in the range 0 - 8 inclusive.
2470
2471 // Count the number of XMM registers allocated.
2472 static const MCPhysReg XMMArgRegs[] = {
2473 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2474 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2475 };
2476 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
2477 assert((Subtarget.hasSSE1() || !NumXMMRegs)
2478 && "SSE registers cannot be used when SSE is disabled");
2479 RegsToPass.push_back(std::make_pair(Register(X86::AL),
2480 DAG.getConstant(NumXMMRegs, dl,
2481 MVT::i8)));
2482 }
2483
2484 if (isVarArg && IsMustTail) {
2485 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
2486 for (const auto &F : Forwards) {
2487 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
2488 RegsToPass.push_back(std::make_pair(F.PReg, Val));
2489 }
2490 }
2491
2492 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
2493 // don't need this because the eligibility check rejects calls that require
2494 // shuffling arguments passed in memory.
2495 if (isTailCall && !IsSibcall) {
2496 // Force all the incoming stack arguments to be loaded from the stack
2497 // before any new outgoing arguments or the return address are stored to the
2498 // stack, because the outgoing stack slots may alias the incoming argument
2499 // stack slots, and the alias isn't otherwise explicit. This is slightly
2500 // more conservative than necessary, because it means that each store
2501 // effectively depends on every argument instead of just those arguments it
2502 // would clobber.
2503 Chain = DAG.getStackArgumentTokenFactor(Chain);
2504
2505 if (ByValTempChain)
2506 Chain =
2507 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chain, ByValTempChain);
2508
2509 SmallVector<SDValue, 8> MemOpChains2;
2510 SDValue FIN;
2511 int FI = 0;
2512 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
2513 ++I, ++OutsIndex) {
2514 CCValAssign &VA = ArgLocs[I];
2515
2516 if (VA.isRegLoc()) {
2517 if (VA.needsCustom()) {
2518 assert((CallConv == CallingConv::X86_RegCall) &&
2519 "Expecting custom case only in regcall calling convention");
2520 // This means that we are in special case where one argument was
2521 // passed through two register locations - Skip the next location
2522 ++I;
2523 }
2524
2525 continue;
2526 }
2527
2528 assert(VA.isMemLoc());
2529 SDValue Arg = OutVals[OutsIndex];
2530 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
2531 // Skip inalloca/preallocated arguments. They don't require any work.
2532 if (Flags.isInAlloca() || Flags.isPreallocated())
2533 continue;
2534 // Create frame index.
2535 int32_t Offset = VA.getLocMemOffset()+FPDiff;
2536 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
2537 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
2538 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2539
2540 if (Flags.isByVal()) {
2541 if (SDValue ByValSrc = ByValTemporaries[OutsIndex]) {
2542 auto PtrVT = getPointerTy(DAG.getDataLayout());
2543 SDValue DstAddr = DAG.getFrameIndex(FI, PtrVT);
2544
2546 ByValSrc, DstAddr, Chain, Flags, DAG, dl));
2547 }
2548 } else {
2549 // Store relative to framepointer.
2550 MemOpChains2.push_back(DAG.getStore(
2551 Chain, dl, Arg, FIN,
2553 }
2554 }
2555
2556 if (!MemOpChains2.empty())
2557 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
2558
2559 // Store the return address to the appropriate stack slot.
2560 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
2562 RegInfo->getSlotSize(), FPDiff, dl);
2563 }
2564
2565 // Build a sequence of copy-to-reg nodes chained together with token chain
2566 // and glue operands which copy the outgoing args into registers.
2567 SDValue InGlue;
2568 for (const auto &[Reg, N] : RegsToPass) {
2569 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
2570 InGlue = Chain.getValue(1);
2571 }
2572
2573 bool IsImpCall = false;
2574 bool IsCFGuardCall = false;
2575 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
2576 assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
2577 // In the 64-bit large code model, we have to make all calls
2578 // through a register, since the call instruction's 32-bit
2579 // pc-relative offset may not be large enough to hold the whole
2580 // address.
2581 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
2582 Callee->getOpcode() == ISD::ExternalSymbol) {
2583 // Lower direct calls to global addresses and external symbols. Setting
2584 // ForCall to true here has the effect of removing WrapperRIP when possible
2585 // to allow direct calls to be selected without first materializing the
2586 // address into a register.
2587 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true, &IsImpCall);
2588 } else if (Subtarget.isTarget64BitILP32() &&
2589 Callee.getValueType() == MVT::i32) {
2590 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
2591 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
2592 } else if (Is64Bit && CB && isCFGuardCall(CB)) {
2593 // We'll use a specific psuedo instruction for tail calls to control flow
2594 // guard functions to guarantee the instruction used for the call. To do
2595 // this we need to unwrap the load now and use the CFG Func GV as the
2596 // callee.
2597 IsCFGuardCall = true;
2598 auto *LoadNode = cast<LoadSDNode>(Callee);
2599 GlobalAddressSDNode *GA =
2600 cast<GlobalAddressSDNode>(unwrapAddress(LoadNode->getBasePtr()));
2602 "CFG Call should be to a guard function");
2603 assert(LoadNode->getOffset()->isUndef() &&
2604 "CFG Function load should not have an offset");
2606 GA->getGlobal(), dl, GA->getValueType(0), 0, X86II::MO_NO_FLAG);
2607 }
2608
2610
2611 if (!IsSibcall && isTailCall && !IsMustTail) {
2612 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InGlue, dl);
2613 InGlue = Chain.getValue(1);
2614 }
2615
2616 Ops.push_back(Chain);
2617 Ops.push_back(Callee);
2618
2619 if (isTailCall)
2620 Ops.push_back(DAG.getSignedTargetConstant(FPDiff, dl, MVT::i32));
2621
2622 // Add argument registers to the end of the list so that they are known live
2623 // into the call.
2624 for (const auto &[Reg, N] : RegsToPass)
2625 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
2626
2627 // Add a register mask operand representing the call-preserved registers.
2628 const uint32_t *Mask = [&]() {
2629 auto AdaptedCC = CallConv;
2630 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
2631 // use X86_INTR calling convention because it has the same CSR mask
2632 // (same preserved registers).
2633 if (HasNCSR)
2635 // If NoCalleeSavedRegisters is requested, than use GHC since it happens
2636 // to use the CSR_NoRegs_RegMask.
2637 if (CB && CB->hasFnAttr("no_callee_saved_registers"))
2638 AdaptedCC = (CallingConv::ID)CallingConv::GHC;
2639 return RegInfo->getCallPreservedMask(MF, AdaptedCC);
2640 }();
2641 assert(Mask && "Missing call preserved mask for calling convention");
2642
2643 if (MachineOperand::clobbersPhysReg(Mask, RegInfo->getFramePtr())) {
2644 X86Info->setFPClobberedByCall(true);
2645 if (CLI.CB && isa<InvokeInst>(CLI.CB))
2646 X86Info->setFPClobberedByInvoke(true);
2647 }
2648 if (MachineOperand::clobbersPhysReg(Mask, RegInfo->getBaseRegister())) {
2649 X86Info->setBPClobberedByCall(true);
2650 if (CLI.CB && isa<InvokeInst>(CLI.CB))
2651 X86Info->setBPClobberedByInvoke(true);
2652 }
2653
2654 // If this is an invoke in a 32-bit function using a funclet-based
2655 // personality, assume the function clobbers all registers. If an exception
2656 // is thrown, the runtime will not restore CSRs.
2657 // FIXME: Model this more precisely so that we can register allocate across
2658 // the normal edge and spill and fill across the exceptional edge.
2659 if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
2660 const Function &CallerFn = MF.getFunction();
2661 EHPersonality Pers =
2662 CallerFn.hasPersonalityFn()
2665 if (isFuncletEHPersonality(Pers))
2666 Mask = RegInfo->getNoPreservedMask();
2667 }
2668
2669 // Define a new register mask from the existing mask.
2670 uint32_t *RegMask = nullptr;
2671
2672 // In some calling conventions we need to remove the used physical registers
2673 // from the reg mask. Create a new RegMask for such calling conventions.
2674 // RegMask for calling conventions that disable only return registers (e.g.
2675 // preserve_most) will be modified later in LowerCallResult.
2676 bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CallConv) || HasNCSR;
2677 if (ShouldDisableArgRegs || shouldDisableRetRegFromCSR(CallConv)) {
2678 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2679
2680 // Allocate a new Reg Mask and copy Mask.
2681 RegMask = MF.allocateRegMask();
2682 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
2683 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
2684
2685 // Make sure all sub registers of the argument registers are reset
2686 // in the RegMask.
2687 if (ShouldDisableArgRegs) {
2688 for (auto const &RegPair : RegsToPass)
2689 for (MCPhysReg SubReg : TRI->subregs_inclusive(RegPair.first))
2690 RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
2691 }
2692
2693 // Create the RegMask Operand according to our updated mask.
2694 Ops.push_back(DAG.getRegisterMask(RegMask));
2695 } else {
2696 // Create the RegMask Operand according to the static mask.
2697 Ops.push_back(DAG.getRegisterMask(Mask));
2698 }
2699
2700 if (InGlue.getNode())
2701 Ops.push_back(InGlue);
2702
2703 if (isTailCall) {
2704 // We used to do:
2705 //// If this is the first return lowered for this function, add the regs
2706 //// to the liveout set for the function.
2707 // This isn't right, although it's probably harmless on x86; liveouts
2708 // should be computed from returns not tail calls. Consider a void
2709 // function making a tail call to a function returning int.
2711 auto Opcode =
2712 IsCFGuardCall ? X86ISD::TC_RETURN_GLOBALADDR : X86ISD::TC_RETURN;
2713 SDValue Ret = DAG.getNode(Opcode, dl, MVT::Other, Ops);
2714
2715 if (IsCFICall)
2716 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2717
2718 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2719 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2720 return Ret;
2721 }
2722
2723 // Returns a chain & a glue for retval copy to use.
2724 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2725 if (IsImpCall) {
2726 Chain = DAG.getNode(X86ISD::IMP_CALL, dl, NodeTys, Ops);
2727 } else if (IsNoTrackIndirectCall) {
2728 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
2729 } else if (IsCFGuardCall) {
2730 Chain = DAG.getNode(X86ISD::CALL_GLOBALADDR, dl, NodeTys, Ops);
2731 } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
2732 // Calls with a "clang.arc.attachedcall" bundle are special. They should be
2733 // expanded to the call, directly followed by a special marker sequence and
2734 // a call to a ObjC library function. Use the CALL_RVMARKER to do that.
2735 assert(!isTailCall &&
2736 "tail calls cannot be marked with clang.arc.attachedcall");
2737 assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode");
2738
2739 // Add a target global address for the retainRV/claimRV runtime function
2740 // just before the call target.
2742 auto PtrVT = getPointerTy(DAG.getDataLayout());
2743 auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT);
2744 Ops.insert(Ops.begin() + 1, GA);
2745 Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
2746 } else {
2747 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
2748 }
2749
2750 if (IsCFICall)
2751 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2752
2753 InGlue = Chain.getValue(1);
2754 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2755 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2756
2757 // Save heapallocsite metadata.
2758 if (CLI.CB)
2759 if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
2760 DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
2761
2762 // Create the CALLSEQ_END node.
2763 unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.
2764 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
2766 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
2767 } else if (hasCalleePopSRet(Outs, ArgLocs, Subtarget)) {
2768 // If this call passes a struct-return pointer, the callee
2769 // pops that struct pointer.
2770 NumBytesForCalleeToPop = 4;
2771 }
2772
2773 // Returns a glue for retval copy to use.
2774 if (!IsSibcall) {
2775 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, NumBytesForCalleeToPop,
2776 InGlue, dl);
2777 InGlue = Chain.getValue(1);
2778 }
2779
2780 if (CallingConv::PreserveNone == CallConv)
2781 for (const ISD::OutputArg &Out : Outs) {
2782 if (Out.Flags.isSwiftSelf() || Out.Flags.isSwiftAsync() ||
2783 Out.Flags.isSwiftError()) {
2784 errorUnsupported(DAG, dl,
2785 "Swift attributes can't be used with preserve_none");
2786 break;
2787 }
2788 }
2789
2790 // Handle result values, copying them out of physregs into vregs that we
2791 // return.
2792 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2793 InVals, RegMask);
2794}
2795
2796//===----------------------------------------------------------------------===//
2797// Fast Calling Convention (tail call) implementation
2798//===----------------------------------------------------------------------===//
2799
2800// Like std call, callee cleans arguments, convention except that ECX is
2801// reserved for storing the tail called function address. Only 2 registers are
2802// free for argument passing (inreg). Tail call optimization is performed
2803// provided:
2804// * tailcallopt is enabled
2805// * caller/callee are fastcc
2806// On X86_64 architecture with GOT-style position independent code only local
2807// (within module) calls are supported at the moment.
2808// To keep the stack aligned according to platform abi the function
2809// GetAlignedArgumentStackSize ensures that argument delta is always multiples
2810// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
2811// If a tail called function callee has more arguments than the caller the
2812// caller needs to make sure that there is room to move the RETADDR to. This is
2813// achieved by reserving an area the size of the argument delta right after the
2814// original RETADDR, but before the saved framepointer or the spilled registers
2815// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
2816// stack layout:
2817// arg1
2818// arg2
2819// RETADDR
2820// [ new RETADDR
2821// move area ]
2822// (possible EBP)
2823// ESI
2824// EDI
2825// local1 ..
2826
2827/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
2828/// requirement.
2829unsigned
2830X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
2831 SelectionDAG &DAG) const {
2832 const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
2833 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
2834 assert(StackSize % SlotSize == 0 &&
2835 "StackSize must be a multiple of SlotSize");
2836 return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
2837}
2838
2839/// Return true if the given stack call argument is already available in the
2840/// same position (relatively) of the caller's incoming argument stack.
2841static
2843 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
2844 const X86InstrInfo *TII, const CCValAssign &VA) {
2845 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2846
2847 for (;;) {
2848 // Look through nodes that don't alter the bits of the incoming value.
2849 unsigned Op = Arg.getOpcode();
2850 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST ||
2851 Op == ISD::AssertZext) {
2852 Arg = Arg.getOperand(0);
2853 continue;
2854 }
2855 if (Op == ISD::TRUNCATE) {
2856 const SDValue &TruncInput = Arg.getOperand(0);
2857 if (TruncInput.getOpcode() == ISD::AssertZext &&
2858 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
2859 Arg.getValueType()) {
2860 Arg = TruncInput.getOperand(0);
2861 continue;
2862 }
2863 }
2864 break;
2865 }
2866
2867 int FI = INT_MAX;
2868 if (Arg.getOpcode() == ISD::CopyFromReg) {
2869 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2870 if (!VR.isVirtual())
2871 return false;
2872 MachineInstr *Def = MRI->getVRegDef(VR);
2873 if (!Def)
2874 return false;
2875 if (!Flags.isByVal()) {
2876 if (!TII->isLoadFromStackSlot(*Def, FI))
2877 return false;
2878 } else {
2879 unsigned Opcode = Def->getOpcode();
2880 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
2881 Opcode == X86::LEA64_32r) &&
2882 Def->getOperand(1).isFI()) {
2883 FI = Def->getOperand(1).getIndex();
2884 Bytes = Flags.getByValSize();
2885 } else
2886 return false;
2887 }
2888 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2889 if (Flags.isByVal())
2890 // ByVal argument is passed in as a pointer but it's now being
2891 // dereferenced. e.g.
2892 // define @foo(%struct.X* %A) {
2893 // tail call @bar(%struct.X* byval %A)
2894 // }
2895 return false;
2896 SDValue Ptr = Ld->getBasePtr();
2898 if (!FINode)
2899 return false;
2900 FI = FINode->getIndex();
2901 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
2903 FI = FINode->getIndex();
2904 Bytes = Flags.getByValSize();
2905 } else
2906 return false;
2907
2908 assert(FI != INT_MAX);
2909 if (!MFI.isFixedObjectIndex(FI))
2910 return false;
2911
2912 if (Offset != MFI.getObjectOffset(FI))
2913 return false;
2914
2915 // If this is not byval, check that the argument stack object is immutable.
2916 // inalloca and argument copy elision can create mutable argument stack
2917 // objects. Byval objects can be mutated, but a byval call intends to pass the
2918 // mutated memory.
2919 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
2920 return false;
2921
2922 if (VA.getLocVT().getFixedSizeInBits() >
2924 // If the argument location is wider than the argument type, check that any
2925 // extension flags match.
2926 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
2927 Flags.isSExt() != MFI.isObjectSExt(FI)) {
2928 return false;
2929 }
2930 }
2931
2932 return Bytes == MFI.getObjectSize(FI);
2933}
2934
2935static bool
2937 Register CallerSRetReg) {
2938 const auto &Outs = CLI.Outs;
2939 const auto &OutVals = CLI.OutVals;
2940
2941 // We know the caller has a sret pointer argument (CallerSRetReg). Locate the
2942 // operand index within the callee that may have a sret pointer too.
2943 unsigned Pos = 0;
2944 for (unsigned E = Outs.size(); Pos != E; ++Pos)
2945 if (Outs[Pos].Flags.isSRet())
2946 break;
2947 // Bail out if the callee has not any sret argument.
2948 if (Pos == Outs.size())
2949 return false;
2950
2951 // At this point, either the caller is forwarding its sret argument to the
2952 // callee, or the callee is being passed a different sret pointer. We now look
2953 // for a CopyToReg, where the callee sret argument is written into a new vreg
2954 // (which should later be %rax/%eax, if this is returned).
2955 SDValue SRetArgVal = OutVals[Pos];
2956 for (SDNode *User : SRetArgVal->users()) {
2957 if (User->getOpcode() != ISD::CopyToReg)
2958 continue;
2960 if (Reg == CallerSRetReg && User->getOperand(2) == SRetArgVal)
2961 return true;
2962 }
2963
2964 return false;
2965}
2966
2967/// Check whether the call is eligible for sibling call optimization. Sibling
2968/// calls are loosely defined to be simple, profitable tail calls that only
2969/// require adjusting register parameters. We do not speculatively to optimize
2970/// complex calls that require lots of argument memory operations that may
2971/// alias.
2972///
2973/// Note that LLVM supports multiple ways, such as musttail, to force tail call
2974/// emission. Returning false from this function will not prevent tail call
2975/// emission in all cases.
2976bool X86TargetLowering::isEligibleForSiblingCallOpt(
2978 SmallVectorImpl<CCValAssign> &ArgLocs) const {
2979 SelectionDAG &DAG = CLI.DAG;
2980 const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2981 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2982 const SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2983 SDValue Callee = CLI.Callee;
2984 CallingConv::ID CalleeCC = CLI.CallConv;
2985 bool isVarArg = CLI.IsVarArg;
2986
2987 if (!mayTailCallThisCC(CalleeCC))
2988 return false;
2989
2990 // If -tailcallopt is specified, make fastcc functions tail-callable.
2991 MachineFunction &MF = DAG.getMachineFunction();
2992 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2993 const Function &CallerF = MF.getFunction();
2994
2995 // If the function return type is x86_fp80 and the callee return type is not,
2996 // then the FP_EXTEND of the call result is not a nop. It's not safe to
2997 // perform a tailcall optimization here.
2998 if (CallerF.getReturnType()->isX86_FP80Ty() && !CLI.RetTy->isX86_FP80Ty())
2999 return false;
3000
3001 // Win64 functions have extra shadow space for argument homing. Don't do the
3002 // sibcall if the caller and callee have mismatched expectations for this
3003 // space.
3004 CallingConv::ID CallerCC = CallerF.getCallingConv();
3005 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
3006 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
3007 if (IsCalleeWin64 != IsCallerWin64)
3008 return false;
3009
3010 // If we are using a GOT, don't generate sibling calls to non-local,
3011 // default-visibility symbols. Tail calling such a symbol requires using a GOT
3012 // relocation, which forces early binding of the symbol. This breaks code that
3013 // require lazy function symbol resolution. Using musttail or
3014 // GuaranteedTailCallOpt will override this.
3015 if (Subtarget.isPICStyleGOT()) {
3016 if (isa<ExternalSymbolSDNode>(Callee))
3017 return false;
3018 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3019 if (!G->getGlobal()->hasLocalLinkage() &&
3020 G->getGlobal()->hasDefaultVisibility())
3021 return false;
3022 }
3023 }
3024
3025 // Look for obvious safe cases to perform tail call optimization that do not
3026 // require ABI changes. This is what gcc calls sibcall.
3027
3028 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
3029 // emit a special epilogue.
3030 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
3031 if (RegInfo->hasStackRealignment(MF))
3032 return false;
3033
3034 // Avoid sibcall optimization if we are an sret return function and the callee
3035 // is incompatible, unless such premises are proven wrong. See comment in
3036 // LowerReturn about why hasStructRetAttr is insufficient.
3037 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
3038 // For a compatible tail call the callee must return our sret pointer. So it
3039 // needs to be (a) an sret function itself and (b) we pass our sret as its
3040 // sret. Condition #b is harder to determine.
3041 if (!mayBeSRetTailCallCompatible(CLI, SRetReg))
3042 return false;
3043 } else if (hasCalleePopSRet(Outs, ArgLocs, Subtarget))
3044 // The callee pops an sret, so we cannot tail-call, as our caller doesn't
3045 // expect that.
3046 return false;
3047
3048 // Do not sibcall optimize vararg calls unless all arguments are passed via
3049 // registers.
3050 LLVMContext &C = *DAG.getContext();
3051 if (isVarArg && !Outs.empty()) {
3052 // Optimizing for varargs on Win64 is unlikely to be safe without
3053 // additional testing.
3054 if (IsCalleeWin64 || IsCallerWin64)
3055 return false;
3056
3057 for (const auto &VA : ArgLocs)
3058 if (!VA.isRegLoc())
3059 return false;
3060 }
3061
3062 // If the call result is in ST0 / ST1, it needs to be popped off the x87
3063 // stack. Therefore, if it's not used by the call it is not safe to optimize
3064 // this into a sibcall.
3065 bool Unused = false;
3066 for (const auto &In : Ins) {
3067 if (!In.Used) {
3068 Unused = true;
3069 break;
3070 }
3071 }
3072 if (Unused) {
3074 CCState RVCCInfo(CalleeCC, false, MF, RVLocs, C);
3075 RVCCInfo.AnalyzeCallResult(Ins, RetCC_X86);
3076 for (const auto &VA : RVLocs) {
3077 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
3078 return false;
3079 }
3080 }
3081
3082 // Check that the call results are passed in the same way.
3083 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
3085 return false;
3086 // The callee has to preserve all registers the caller needs to preserve.
3087 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
3088 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3089 if (CallerCC != CalleeCC) {
3090 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3091 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3092 return false;
3093 }
3094
3095 // The stack frame of the caller cannot be replaced by the tail-callee one's
3096 // if the function is required to preserve all the registers. Conservatively
3097 // prevent tail optimization even if hypothetically all the registers are used
3098 // for passing formal parameters or returning values.
3099 if (CallerF.hasFnAttribute("no_caller_saved_registers"))
3100 return false;
3101
3102 unsigned StackArgsSize = CCInfo.getStackSize();
3103
3104 // If the callee takes no arguments then go on to check the results of the
3105 // call.
3106 if (!Outs.empty()) {
3107 if (StackArgsSize > 0) {
3108 // Check if the arguments are already laid out in the right way as
3109 // the caller's fixed stack objects.
3110 MachineFrameInfo &MFI = MF.getFrameInfo();
3111 const MachineRegisterInfo *MRI = &MF.getRegInfo();
3112 const X86InstrInfo *TII = Subtarget.getInstrInfo();
3113 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
3114 const CCValAssign &VA = ArgLocs[I];
3115 SDValue Arg = OutVals[I];
3116 ISD::ArgFlagsTy Flags = Outs[I].Flags;
3118 return false;
3119 if (!VA.isRegLoc()) {
3120 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI,
3121 TII, VA))
3122 return false;
3123 }
3124 }
3125 }
3126
3127 bool PositionIndependent = isPositionIndependent();
3128 // If the tailcall address may be in a register, then make sure it's
3129 // possible to register allocate for it. In 32-bit, the call address can
3130 // only target EAX, EDX, or ECX since the tail call must be scheduled after
3131 // callee-saved registers are restored. These happen to be the same
3132 // registers used to pass 'inreg' arguments so watch out for those.
3133 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
3134 !isa<ExternalSymbolSDNode>(Callee)) ||
3135 PositionIndependent)) {
3136 unsigned NumInRegs = 0;
3137 // In PIC we need an extra register to formulate the address computation
3138 // for the callee.
3139 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
3140
3141 for (const auto &VA : ArgLocs) {
3142 if (!VA.isRegLoc())
3143 continue;
3144 Register Reg = VA.getLocReg();
3145 switch (Reg) {
3146 default: break;
3147 case X86::EAX: case X86::EDX: case X86::ECX:
3148 if (++NumInRegs == MaxInRegs)
3149 return false;
3150 break;
3151 }
3152 }
3153 }
3154
3155 const MachineRegisterInfo &MRI = MF.getRegInfo();
3156 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3157 return false;
3158 }
3159
3160 bool CalleeWillPop =
3161 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
3163
3164 if (unsigned BytesToPop = FuncInfo->getBytesToPopOnReturn()) {
3165 // If we have bytes to pop, the callee must pop them.
3166 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
3167 if (!CalleePopMatches)
3168 return false;
3169 } else if (CalleeWillPop && StackArgsSize > 0) {
3170 // If we don't have bytes to pop, make sure the callee doesn't pop any.
3171 return false;
3172 }
3173
3174 return true;
3175}
3176
3177/// Determines whether the callee is required to pop its own arguments.
3178/// Callee pop is necessary to support tail calls.
3180 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
3181 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
3182 // can guarantee TCO.
3183 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
3184 return true;
3185
3186 switch (CallingConv) {
3187 default:
3188 return false;
3193 return !is64Bit;
3194 }
3195}
return SDValue()
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
static bool IsIndirectCall(const MachineInstr *MI)
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
const MCPhysReg ArgGPRs[]
static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt)
Return true if the function is being made into a tailcall target by changing its ABI.
static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, const M68kInstrInfo *TII, const CCValAssign &VA)
Return true if the given stack call argument is already available in the same position (relatively) o...
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
This file defines ARC utility functions which are used by various parts of the compiler.
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:484
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
static bool is64Bit(const char *name)
static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc, const SDLoc &DL, SelectionDAG &DAG)
Lowers masks values (v*i1) to the local register values.
static void Passv64i1ArgInRegs(const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg, SmallVectorImpl< std::pair< Register, SDValue > > &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, const X86Subtarget &Subtarget)
Breaks v64i1 value into two registers and adds the new node to the DAG.
static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget, SDValue *InGlue=nullptr)
Reads two 32 bit registers and creates a 64 bit mask value.
static ArrayRef< MCPhysReg > get64BitArgumentXMMs(MachineFunction &MF, CallingConv::ID CallConv, const X86Subtarget &Subtarget)
static bool isSortedByValueNo(ArrayRef< CCValAssign > ArgLocs)
static ArrayRef< MCPhysReg > get64BitArgumentGPRs(CallingConv::ID CallConv, const X86Subtarget &Subtarget)
static SDValue getPopFromX87Reg(SelectionDAG &DAG, SDValue Chain, const SDLoc &dl, Register Reg, EVT VT, SDValue Glue)
static bool mayBeSRetTailCallCompatible(const TargetLowering::CallLoweringInfo &CLI, Register CallerSRetReg)
static std::pair< MVT, unsigned > handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC, const X86Subtarget &Subtarget)
static bool shouldDisableRetRegFromCSR(CallingConv::ID CC)
Returns true if a CC can dynamically exclude a register from the list of callee-saved-registers (Targ...
static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl, const char *Msg)
Call this when the user attempts to do something unsupported, like returning a double without SSE2 en...
static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue RetAddrFrIdx, EVT PtrVT, unsigned SlotSize, int FPDiff, const SDLoc &dl)
Emit a store of the return address if tail call optimization is performed and it is required (FPDiff!...
static bool shouldDisableArgRegFromCSR(CallingConv::ID CC)
Returns true if a CC can dynamically exclude a register from the list of callee-saved-registers (Targ...
static bool hasStackGuardSlotTLS(const Triple &TargetTriple)
static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT, const EVT &ValLoc, const SDLoc &DL, SelectionDAG &DAG)
The function will lower a register of various sizes (8/16/32/64) to a mask value of the expected size...
static Constant * SegmentOffset(IRBuilderBase &IRB, int Offset, unsigned AddressSpace)
static bool hasCalleePopSRet(const SmallVectorImpl< T > &Args, const SmallVectorImpl< CCValAssign > &ArgLocs, const X86Subtarget &Subtarget)
Determines whether Args, either a set of outgoing arguments to a call, or a set of incoming args of a...
static bool isBitAligned(Align Alignment, uint64_t SizeInBits)
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:185
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
CCState - This class holds information needed while lowering arguments and return values.
static LLVM_ABI bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
void convertToReg(MCRegister Reg)
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
CallingConv::ID getCallingConv() const
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Diagnostic information for unsupported feature in backend.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition Function.h:879
Constant * getPersonalityFn() const
Get the personality function associated with this function.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:723
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
@ ExternalLinkage
Externally visible function.
Definition GlobalValue.h:53
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:175
LLVMContext & getContext() const
Definition IRBuilder.h:177
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:577
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Tracks which library functions to use for a particular subtarget.
This class is used to represent ISD::LOAD nodes.
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:213
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool is512BitVector() const
Return true if this is a 512-bit vector type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setObjectZExt(int ObjectIdx, bool IsZExt)
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setObjectSExt(int ObjectIdx, bool IsSExt)
bool isImmutableObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to an immutable object.
void setHasTailCall(bool V=true)
bool isObjectZExt(int ObjectIdx) const
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool isObjectSExt(int ObjectIdx) const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
uint32_t * allocateRegMask()
Allocate and initialize a register mask with NumRegister bits.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
@ EK_LabelDifference64
EK_LabelDifference64 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOStore
The memory access writes data.
static unsigned getRegMaskSize(unsigned NumRegs)
Returns number of elements needed for a regmask array.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
ArrayRef< std::pair< MCRegister, Register > > liveins() const
LLVM_ABI void disableCalleeSavedRegister(MCRegister Reg)
Disables the register from the list of CSRs.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
const DebugLoc & getDebugLoc() const
Represents one node in the SelectionDAG.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align DstAlign, Align SrcAlign, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
void addHeapAllocSite(const SDNode *Node, MDNode *MD)
Set HeapAllocSite to be associated with Node.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
const TargetMachine & getTarget() const
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
LLVMContext * getContext() const
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
Class to represent struct types.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual Value * getIRStackGuard(IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
virtual void insertSSPDeclarations(Module &M, const LibcallLoweringInfo &Libcalls) const
Inserts necessary declarations for SSP (stack protection) purpose.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
static StringRef getLibcallImplName(RTLIB::LibcallImpl Call)
Get the libcall routine name for the specified libcall implementation.
virtual Value * getSafeStackPointerLocation(IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const
Returns the target-specific address of the unsafe stack pointer.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
bool isPositionIndependent() const
virtual ArrayRef< MCPhysReg > getRoundingControlRegisters() const
Returns a 0 terminated array of rounding control registers that can be attached into strict FP call.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
void setTypeIdForCallsiteInfo(const CallBase *CB, MachineFunction &MF, MachineFunction::CallSiteInfo &CSInfo) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
bool isAndroid() const
Tests whether the target is Android.
Definition Triple.h:821
bool isMusl() const
Tests whether the environment is musl-libc.
Definition Triple.h:836
bool isOSGlibc() const
Tests whether the OS uses glibc.
Definition Triple.h:759
bool isOSFuchsia() const
Definition Triple.h:661
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:310
bool isX86_FP80Ty() const
Return true if this is x86 long double.
Definition Type.h:161
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:282
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
void setBytesToPopOnReturn(unsigned bytes)
void setVarArgsGPOffset(unsigned Offset)
SmallVectorImpl< ForwardedRegister > & getForwardedMustTailRegParms()
void setVarArgsFPOffset(unsigned Offset)
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
Register getStackRegister() const
unsigned getSlotSize() const
Register getFramePtr() const
Returns physical register used as frame pointer.
Register getBaseRegister() const
const uint32_t * getNoPreservedMask() const override
bool hasSSE1() const
const Triple & getTargetTriple() const
bool useAVX512Regs() const
bool isCallingConvWin64(CallingConv::ID CC) const
bool isOSWindowsOrUEFI() const
std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override
Return the largest legal super-reg register class of the register class for the specified type and it...
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMemoryAccessFast(EVT VT, Align Alignment) const
Value * getIRStackGuard(IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool useSoftFloat() const override
Value * getSafeStackPointerLocation(IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const override
Return true if the target stores SafeStack pointer at a fixed offset in some non-standard address spa...
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool isSafeMemOpType(MVT VT) const override
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg, const DataLayout &DL) const override
For some targets, an LLVM struct type must be broken down into multiple simple types,...
Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
Return the desired alignment for ByVal aggregate function arguments in the caller parameter area.
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Returns true if the target allows unaligned memory accesses of the specified type.
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void markLibCallAttributes(MachineFunction *MF, unsigned CC, ArgListTy &Args) const override
void insertSSPDeclarations(Module &M, const LibcallLoweringInfo &Libcalls) const override
Inserts necessary declarations for SSP (stack protection) purpose.
bool isScalarFPTypeInSSEReg(EVT VT) const
Return true if the specified scalar FP type is computed in an SSE register, not on the X87 floating p...
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
This function returns true if the memory access is aligned or if the target allows this specific unal...
SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const
SDValue unwrapAddress(SDValue N) const override
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the value type to use for ISD::SETCC.
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override
For types supported by the target, this is an identity function.
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
CallingConv Namespace - This namespace contains an enum with a value for the well-known calling conve...
Definition CallingConv.h:21
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ X86_64_SysV
The C convention as specified in the x86-64 supplement to the System V ABI, used on most non-Windows ...
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
Definition CallingConv.h:53
@ Swift
Calling convention for Swift.
Definition CallingConv.h:69
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ X86_INTR
x86 hardware interrupt context.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ X86_ThisCall
Similar to X86_StdCall.
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition CallingConv.h:66
@ X86_StdCall
stdcall is mostly used by the Win32 API.
Definition CallingConv.h:99
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ X86_VectorCall
MSVC calling convention that passes vectors and vector aggregates in SSE registers.
@ Intel_OCL_BI
Used for Intel OpenCL built-ins.
@ PreserveNone
Used for runtime calls that preserves none general registers.
Definition CallingConv.h:90
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ Win64
The C convention as implemented on Windows/x86-64 and AArch64.
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition CallingConv.h:87
@ X86_RegCall
Register calling convention used for parameters transfer optimization.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ X86_FastCall
'fast' analog of X86_StdCall.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:861
@ GlobalAddress
Definition ISDOpcodes.h:88
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:852
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:858
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:986
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ ExternalSymbol
Definition ISDOpcodes.h:93
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:967
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:864
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
LLVM_ABI LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
@ MO_NO_FLAG
MO_NO_FLAG - No flag for the operand.
@ GlobalBaseReg
On Darwin, this node represents the result of the popl at function entry, used for PIC code.
@ POP_FROM_X87_REG
The same as ISD::CopyFromReg except that this node makes it explicit that it may lower to an x87 FPU ...
bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget, const MachineFunction &MF)
True if the target supports the extended frame for async Swift functions.
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO)
Determines whether the callee is required to pop its own arguments.
std::optional< Function * > getAttachedARCFunction(const CallBase *CB)
This function returns operand bundle clang_arc_attachedcall's argument, which is the address of the A...
Definition ObjCARCUtil.h:43
bool hasAttachedCallOpBundle(const CallBase *CB)
Definition ObjCARCUtil.h:29
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:573
LLVM_ABI bool isCFGuardCall(const CallBase *CB)
Definition CFGuard.cpp:318
InstructionCost Cost
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool isCFGuardFunction(const GlobalValue *GV)
Definition CFGuard.cpp:323
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition STLExtras.h:1970
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
bool isFuncletEHPersonality(EHPersonality Pers)
Returns true if this is a personality function that invokes handler funclets (which must return to it...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool CC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool RetCC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
static constexpr Align Constant()
Allow constructions of constexpr Align.
Definition Alignment.h:88
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:418
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:323
ElementCount getVectorElementCount() const
Definition ValueTypes.h:373
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:396
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:339
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:230
bool is512BitVector() const
Return true if this is a 512-bit vector type.
Definition ValueTypes.h:240
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:235
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:351
bool isVectorOf(EVT EltVT) const
Return true if this is a vector with matching element type.
Definition ValueTypes.h:181
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:359
Describes a register that needs to be forwarded from the prologue to a musttail call.
SmallVector< ArgRegPair, 1 > ArgRegPairs
Vector of call argument and its forwarding register.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
Type * RetTy
Same as OrigRetTy, or partially legalized for soft float libcalls.