LLVM 22.0.0git
AArch64PrologueEpilogue.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
12#include "AArch64Subtarget.h"
14#include "llvm/ADT/Statistic.h"
17#include "llvm/MC/MCContext.h"
18
19#define DEBUG_TYPE "frame-info"
20
21STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
22
23namespace llvm {
24
25static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO,
26 RTLIB::Libcall LC) {
27 return MO.isSymbol() &&
28 StringRef(TLI.getLibcallName(LC)) == MO.getSymbolName();
29}
30
32 return AFI->hasStreamingModeChanges() &&
33 !MF.getSubtarget<AArch64Subtarget>().hasSVE();
34}
35
38 unsigned Opc = MBBI->getOpcode();
39 if (Opc == AArch64::CNTD_XPiI)
40 return true;
41
42 if (!requiresGetVGCall())
43 return false;
44
45 if (Opc == AArch64::BL)
46 return matchLibcall(TLI, MBBI->getOperand(0), RTLIB::SMEABI_GET_CURRENT_VG);
47
48 return Opc == TargetOpcode::COPY;
49}
50
51// Convenience function to determine whether I is part of the ZPR callee saves.
53 switch (I->getOpcode()) {
54 default:
55 return false;
56 case AArch64::LD1B_2Z_IMM:
57 case AArch64::ST1B_2Z_IMM:
58 case AArch64::STR_ZXI:
59 case AArch64::LDR_ZXI:
60 case AArch64::PTRUE_C_B:
61 return I->getFlag(MachineInstr::FrameSetup) ||
63 case AArch64::SEH_SaveZReg:
64 return true;
65 }
66}
67
68// Convenience function to determine whether I is part of the PPR callee saves.
70 switch (I->getOpcode()) {
71 default:
72 return false;
73 case AArch64::STR_PXI:
74 case AArch64::LDR_PXI:
75 return I->getFlag(MachineInstr::FrameSetup) ||
77 case AArch64::SEH_SavePReg:
78 return true;
79 }
80}
81
82// Convenience function to determine whether I is part of the SVE callee saves.
86
90 : MF(MF), MBB(MBB), MFI(MF.getFrameInfo()),
91 Subtarget(MF.getSubtarget<AArch64Subtarget>()), AFL(AFL),
92 RegInfo(*Subtarget.getRegisterInfo()) {
93 TII = Subtarget.getInstrInfo();
94 AFI = MF.getInfo<AArch64FunctionInfo>();
95
96 HasFP = AFL.hasFP(MF);
97 NeedsWinCFI = AFL.needsWinCFI(MF);
98
99 // Windows unwind can't represent the required stack adjustments if we have
100 // both SVE callee-saves and dynamic stack allocations, and the frame pointer
101 // is before the SVE spills. The allocation of the frame pointer must be the
102 // last instruction in the prologue so the unwinder can restore the stack
103 // pointer correctly. (And there isn't any unwind opcode for `addvl sp, x29,
104 // -17`.)
105 //
106 // Because of this, we do spills in the opposite order on Windows: first SVE,
107 // then GPRs. The main side-effect of this is that it makes accessing
108 // parameters passed on the stack more expensive.
109 //
110 // We could consider rearranging the spills for simpler cases.
111 if (Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize()) {
112 if (AFI->hasStackHazardSlotIndex())
113 reportFatalUsageError("SME hazard padding is not supported on Windows");
114 SVELayout = SVEStackLayout::CalleeSavesAboveFrameRecord;
115 } else if (AFI->hasSplitSVEObjects()) {
116 SVELayout = SVEStackLayout::Split;
117 }
118}
119
122 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int CSStackSizeInc,
123 bool EmitCFI, MachineInstr::MIFlag FrameFlag, int CFAOffset) const {
124 unsigned NewOpc;
125
126 // If the function contains streaming mode changes, we expect instructions
127 // to calculate the value of VG before spilling. Move past these instructions
128 // if necessary.
129 if (AFL.requiresSaveVG(MF)) {
130 auto &TLI = *Subtarget.getTargetLowering();
131 while (isVGInstruction(MBBI, TLI))
132 ++MBBI;
133 }
134
135 switch (MBBI->getOpcode()) {
136 default:
137 llvm_unreachable("Unexpected callee-save save/restore opcode!");
138 case AArch64::STPXi:
139 NewOpc = AArch64::STPXpre;
140 break;
141 case AArch64::STPDi:
142 NewOpc = AArch64::STPDpre;
143 break;
144 case AArch64::STPQi:
145 NewOpc = AArch64::STPQpre;
146 break;
147 case AArch64::STRXui:
148 NewOpc = AArch64::STRXpre;
149 break;
150 case AArch64::STRDui:
151 NewOpc = AArch64::STRDpre;
152 break;
153 case AArch64::STRQui:
154 NewOpc = AArch64::STRQpre;
155 break;
156 case AArch64::LDPXi:
157 NewOpc = AArch64::LDPXpost;
158 break;
159 case AArch64::LDPDi:
160 NewOpc = AArch64::LDPDpost;
161 break;
162 case AArch64::LDPQi:
163 NewOpc = AArch64::LDPQpost;
164 break;
165 case AArch64::LDRXui:
166 NewOpc = AArch64::LDRXpost;
167 break;
168 case AArch64::LDRDui:
169 NewOpc = AArch64::LDRDpost;
170 break;
171 case AArch64::LDRQui:
172 NewOpc = AArch64::LDRQpost;
173 break;
174 }
175 TypeSize Scale = TypeSize::getFixed(1), Width = TypeSize::getFixed(0);
176 int64_t MinOffset, MaxOffset;
177 bool Success = static_cast<const AArch64InstrInfo *>(TII)->getMemOpInfo(
178 NewOpc, Scale, Width, MinOffset, MaxOffset);
179 (void)Success;
180 assert(Success && "unknown load/store opcode");
181
182 // If the first store isn't right where we want SP then we can't fold the
183 // update in so create a normal arithmetic instruction instead.
184 if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 ||
185 CSStackSizeInc < MinOffset * (int64_t)Scale.getFixedValue() ||
186 CSStackSizeInc > MaxOffset * (int64_t)Scale.getFixedValue()) {
187 // If we are destroying the frame, make sure we add the increment after the
188 // last frame operation.
189 if (FrameFlag == MachineInstr::FrameDestroy) {
190 ++MBBI;
191 // Also skip the SEH instruction, if needed
192 if (NeedsWinCFI && AArch64InstrInfo::isSEHInstruction(*MBBI))
193 ++MBBI;
194 }
195 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
196 StackOffset::getFixed(CSStackSizeInc), TII, FrameFlag,
197 false, NeedsWinCFI, &HasWinCFI, EmitCFI,
198 StackOffset::getFixed(CFAOffset));
199
200 return std::prev(MBBI);
201 }
202
203 // Get rid of the SEH code associated with the old instruction.
204 if (NeedsWinCFI) {
205 auto SEH = std::next(MBBI);
206 if (AArch64InstrInfo::isSEHInstruction(*SEH))
207 SEH->eraseFromParent();
208 }
209
210 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
211 MIB.addReg(AArch64::SP, RegState::Define);
212
213 // Copy all operands other than the immediate offset.
214 unsigned OpndIdx = 0;
215 for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
216 ++OpndIdx)
217 MIB.add(MBBI->getOperand(OpndIdx));
218
219 assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
220 "Unexpected immediate offset in first/last callee-save save/restore "
221 "instruction!");
222 assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
223 "Unexpected base register in callee-save save/restore instruction!");
224 assert(CSStackSizeInc % Scale == 0);
225 MIB.addImm(CSStackSizeInc / (int)Scale);
226
227 MIB.setMIFlags(MBBI->getFlags());
228 MIB.setMemRefs(MBBI->memoperands());
229
230 // Generate a new SEH code that corresponds to the new instruction.
231 if (NeedsWinCFI) {
232 HasWinCFI = true;
233 AFL.insertSEH(*MIB, *TII, FrameFlag);
234 }
235
236 if (EmitCFI)
237 CFIInstBuilder(MBB, MBBI, FrameFlag)
238 .buildDefCFAOffset(CFAOffset - CSStackSizeInc);
239
240 return std::prev(MBB.erase(MBBI));
241}
242
243// Fix up the SEH opcode associated with the save/restore instruction.
245 unsigned LocalStackSize) {
246 MachineOperand *ImmOpnd = nullptr;
247 unsigned ImmIdx = MBBI->getNumOperands() - 1;
248 switch (MBBI->getOpcode()) {
249 default:
250 llvm_unreachable("Fix the offset in the SEH instruction");
251 case AArch64::SEH_SaveFPLR:
252 case AArch64::SEH_SaveRegP:
253 case AArch64::SEH_SaveReg:
254 case AArch64::SEH_SaveFRegP:
255 case AArch64::SEH_SaveFReg:
256 case AArch64::SEH_SaveAnyRegI:
257 case AArch64::SEH_SaveAnyRegIP:
258 case AArch64::SEH_SaveAnyRegQP:
259 case AArch64::SEH_SaveAnyRegQPX:
260 ImmOpnd = &MBBI->getOperand(ImmIdx);
261 break;
262 }
263 if (ImmOpnd)
264 ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
265}
266
268 MachineInstr &MI, uint64_t LocalStackSize) const {
269 if (AArch64InstrInfo::isSEHInstruction(MI))
270 return;
271
272 unsigned Opc = MI.getOpcode();
273 unsigned Scale;
274 switch (Opc) {
275 case AArch64::STPXi:
276 case AArch64::STRXui:
277 case AArch64::STPDi:
278 case AArch64::STRDui:
279 case AArch64::LDPXi:
280 case AArch64::LDRXui:
281 case AArch64::LDPDi:
282 case AArch64::LDRDui:
283 Scale = 8;
284 break;
285 case AArch64::STPQi:
286 case AArch64::STRQui:
287 case AArch64::LDPQi:
288 case AArch64::LDRQui:
289 Scale = 16;
290 break;
291 default:
292 llvm_unreachable("Unexpected callee-save save/restore opcode!");
293 }
294
295 unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
296 assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
297 "Unexpected base register in callee-save save/restore instruction!");
298 // Last operand is immediate offset that needs fixing.
299 MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
300 // All generated opcodes have scaled offsets.
301 assert(LocalStackSize % Scale == 0);
302 OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
303
304 if (NeedsWinCFI) {
305 HasWinCFI = true;
306 auto MBBI = std::next(MachineBasicBlock::iterator(MI));
307 assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
308 assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
309 "Expecting a SEH instruction");
310 fixupSEHOpcode(MBBI, LocalStackSize);
311 }
312}
313
315 uint64_t StackBumpBytes) const {
316 if (AFL.homogeneousPrologEpilog(MF))
317 return false;
318
319 if (AFI->getLocalStackSize() == 0)
320 return false;
321
322 // For WinCFI, if optimizing for size, prefer to not combine the stack bump
323 // (to force a stp with predecrement) to match the packed unwind format,
324 // provided that there actually are any callee saved registers to merge the
325 // decrement with.
326 // This is potentially marginally slower, but allows using the packed
327 // unwind format for functions that both have a local area and callee saved
328 // registers. Using the packed unwind format notably reduces the size of
329 // the unwind info.
330 if (AFL.needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 &&
331 MF.getFunction().hasOptSize())
332 return false;
333
334 // 512 is the maximum immediate for stp/ldp that will be used for
335 // callee-save save/restores
336 if (StackBumpBytes >= 512 ||
337 AFL.windowsRequiresStackProbe(MF, StackBumpBytes))
338 return false;
339
340 if (MFI.hasVarSizedObjects())
341 return false;
342
343 if (RegInfo.hasStackRealignment(MF))
344 return false;
345
346 // This isn't strictly necessary, but it simplifies things a bit since the
347 // current RedZone handling code assumes the SP is adjusted by the
348 // callee-save save/restore code.
349 if (AFL.canUseRedZone(MF))
350 return false;
351
352 // When there is an SVE area on the stack, always allocate the
353 // callee-saves and spills/locals separately.
354 if (AFI->hasSVEStackSize())
355 return false;
356
357 return true;
358}
359
361 StackOffset PPRCalleeSavesSize =
362 StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize());
363 StackOffset ZPRCalleeSavesSize =
364 StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize());
365 StackOffset PPRLocalsSize = AFL.getPPRStackSize(MF) - PPRCalleeSavesSize;
366 StackOffset ZPRLocalsSize = AFL.getZPRStackSize(MF) - ZPRCalleeSavesSize;
368 return {{PPRCalleeSavesSize, PPRLocalsSize},
369 {ZPRCalleeSavesSize, ZPRLocalsSize}};
370 // For simplicity, attribute all locals to ZPRs when split SVE is disabled.
371 return {{PPRCalleeSavesSize, StackOffset{}},
372 {ZPRCalleeSavesSize, PPRLocalsSize + ZPRLocalsSize}};
373}
374
376 SVEFrameSizes const &SVE) {
377 StackOffset AfterZPRs = SVE.ZPR.LocalsSize;
378 StackOffset BeforePPRs = SVE.ZPR.CalleeSavesSize + SVE.PPR.CalleeSavesSize;
379 StackOffset AfterPPRs = {};
381 BeforePPRs = SVE.PPR.CalleeSavesSize;
382 // If there are no ZPR CSRs, place all local allocations after the ZPRs.
383 if (SVE.ZPR.CalleeSavesSize)
384 AfterPPRs += SVE.PPR.LocalsSize + SVE.ZPR.CalleeSavesSize;
385 else
386 AfterZPRs += SVE.PPR.LocalsSize; // Group allocation of locals.
387 }
388 return {BeforePPRs, AfterPPRs, AfterZPRs};
389}
390
396
399 StackOffset PPRCalleeSavesSize,
400 StackOffset ZPRCalleeSavesSize,
401 bool IsEpilogue) {
404 IsEpilogue ? MBB.begin() : MBB.getFirstTerminator();
405 auto AdjustI = [&](auto MBBI) { return IsEpilogue ? std::prev(MBBI) : MBBI; };
406 // Process the SVE CS to find the starts/ends of the ZPR and PPR areas.
407 if (PPRCalleeSavesSize) {
408 PPRsI = AdjustI(PPRsI);
409 assert(isPartOfPPRCalleeSaves(*PPRsI) && "Unexpected instruction");
410 while (PPRsI != End && isPartOfPPRCalleeSaves(AdjustI(PPRsI)))
411 IsEpilogue ? (--PPRsI) : (++PPRsI);
412 }
413 MachineBasicBlock::iterator ZPRsI = PPRsI;
414 if (ZPRCalleeSavesSize) {
415 ZPRsI = AdjustI(ZPRsI);
416 assert(isPartOfZPRCalleeSaves(*ZPRsI) && "Unexpected instruction");
417 while (ZPRsI != End && isPartOfZPRCalleeSaves(AdjustI(ZPRsI)))
418 IsEpilogue ? (--ZPRsI) : (++ZPRsI);
419 }
420 if (IsEpilogue)
421 return {{PPRsI, MBBI}, {ZPRsI, PPRsI}};
422 return {{MBBI, PPRsI}, {PPRsI, ZPRsI}};
423}
424
429 EmitCFI = AFI->needsDwarfUnwindInfo(MF);
430 EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
431 IsFunclet = MBB.isEHFuncletEntry();
432 HomPrologEpilog = AFL.homogeneousPrologEpilog(MF);
433
434#ifndef NDEBUG
435 collectBlockLiveins();
436#endif
437}
438
439#ifndef NDEBUG
440/// Collect live registers from the end of \p MI's parent up to (including) \p
441/// MI in \p LiveRegs.
444
445 MachineBasicBlock &MBB = *MI.getParent();
446 LiveRegs.addLiveOuts(MBB);
447 for (const MachineInstr &MI :
448 reverse(make_range(MI.getIterator(), MBB.instr_end())))
449 LiveRegs.stepBackward(MI);
450}
451
452void AArch64PrologueEmitter::collectBlockLiveins() {
453 // Collect live register from the end of MBB up to the start of the existing
454 // frame setup instructions.
455 PrologueEndI = MBB.begin();
456 while (PrologueEndI != MBB.end() &&
457 PrologueEndI->getFlag(MachineInstr::FrameSetup))
458 ++PrologueEndI;
459
460 if (PrologueEndI != MBB.end()) {
461 getLivePhysRegsUpTo(*PrologueEndI, RegInfo, LiveRegs);
462 // Ignore registers used for stack management for now.
463 LiveRegs.removeReg(AArch64::SP);
464 LiveRegs.removeReg(AArch64::X19);
465 LiveRegs.removeReg(AArch64::FP);
466 LiveRegs.removeReg(AArch64::LR);
467
468 // X0 will be clobbered by a call to __arm_get_current_vg in the prologue.
469 // This is necessary to spill VG if required where SVE is unavailable, but
470 // X0 is preserved around this call.
471 if (requiresGetVGCall())
472 LiveRegs.removeReg(AArch64::X0);
473 }
474}
475
476void AArch64PrologueEmitter::verifyPrologueClobbers() const {
477 if (PrologueEndI == MBB.end())
478 return;
479 // Check if any of the newly instructions clobber any of the live registers.
480 for (MachineInstr &MI :
481 make_range(MBB.instr_begin(), PrologueEndI->getIterator())) {
482 for (auto &Op : MI.operands())
483 if (Op.isReg() && Op.isDef())
484 assert(!LiveRegs.contains(Op.getReg()) &&
485 "live register clobbered by inserted prologue instructions");
486 }
487}
488#endif
489
490void AArch64PrologueEmitter::determineLocalsStackSize(
491 uint64_t StackSize, uint64_t PrologueSaveSize) {
492 AFI->setLocalStackSize(StackSize - PrologueSaveSize);
493 CombineSPBump = shouldCombineCSRLocalStackBump(StackSize);
494}
495
496// Return the maximum possible number of bytes for `Size` due to the
497// architectural limit on the size of a SVE register.
498static int64_t upperBound(StackOffset Size) {
499 static const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16;
500 return Size.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE + Size.getFixed();
501}
502
503void AArch64PrologueEmitter::allocateStackSpace(
504 MachineBasicBlock::iterator MBBI, int64_t RealignmentPadding,
505 StackOffset AllocSize, bool EmitCFI, StackOffset InitialOffset,
506 bool FollowupAllocs) {
507
508 if (!AllocSize)
509 return;
510
511 DebugLoc DL;
512 const int64_t MaxAlign = MFI.getMaxAlign().value();
513 const uint64_t AndMask = ~(MaxAlign - 1);
514
516 Register TargetReg = RealignmentPadding
517 ? AFL.findScratchNonCalleeSaveRegister(&MBB)
518 : AArch64::SP;
519 // SUB Xd/SP, SP, AllocSize
520 emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, TII,
522 EmitCFI, InitialOffset);
523
524 if (RealignmentPadding) {
525 // AND SP, X9, 0b11111...0000
526 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
527 .addReg(TargetReg, RegState::Kill)
530 AFI->setStackRealigned(true);
531
532 // No need for SEH instructions here; if we're realigning the stack,
533 // we've set a frame pointer and already finished the SEH prologue.
535 }
536 return;
537 }
538
539 //
540 // Stack probing allocation.
541 //
542
543 // Fixed length allocation. If we don't need to re-align the stack and don't
544 // have SVE objects, we can use a more efficient sequence for stack probing.
545 if (AllocSize.getScalable() == 0 && RealignmentPadding == 0) {
546 Register ScratchReg = AFL.findScratchNonCalleeSaveRegister(&MBB);
547 assert(ScratchReg != AArch64::NoRegister);
548 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PROBED_STACKALLOC))
549 .addDef(ScratchReg)
550 .addImm(AllocSize.getFixed())
551 .addImm(InitialOffset.getFixed())
552 .addImm(InitialOffset.getScalable());
553 // The fixed allocation may leave unprobed bytes at the top of the
554 // stack. If we have subsequent allocation (e.g. if we have variable-sized
555 // objects), we need to issue an extra probe, so these allocations start in
556 // a known state.
557 if (FollowupAllocs) {
558 // STR XZR, [SP]
559 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
560 .addReg(AArch64::XZR)
561 .addReg(AArch64::SP)
562 .addImm(0)
564 }
565
566 return;
567 }
568
569 // Variable length allocation.
570
571 // If the (unknown) allocation size cannot exceed the probe size, decrement
572 // the stack pointer right away.
573 int64_t ProbeSize = AFI->getStackProbeSize();
574 if (upperBound(AllocSize) + RealignmentPadding <= ProbeSize) {
575 Register ScratchReg = RealignmentPadding
576 ? AFL.findScratchNonCalleeSaveRegister(&MBB)
577 : AArch64::SP;
578 assert(ScratchReg != AArch64::NoRegister);
579 // SUB Xd, SP, AllocSize
580 emitFrameOffset(MBB, MBBI, DL, ScratchReg, AArch64::SP, -AllocSize, TII,
582 EmitCFI, InitialOffset);
583 if (RealignmentPadding) {
584 // AND SP, Xn, 0b11111...0000
585 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
586 .addReg(ScratchReg, RegState::Kill)
589 AFI->setStackRealigned(true);
590 }
591 if (FollowupAllocs || upperBound(AllocSize) + RealignmentPadding >
593 // STR XZR, [SP]
594 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
595 .addReg(AArch64::XZR)
596 .addReg(AArch64::SP)
597 .addImm(0)
599 }
600 return;
601 }
602
603 // Emit a variable-length allocation probing loop.
604 // TODO: As an optimisation, the loop can be "unrolled" into a few parts,
605 // each of them guaranteed to adjust the stack by less than the probe size.
606 Register TargetReg = AFL.findScratchNonCalleeSaveRegister(&MBB);
607 assert(TargetReg != AArch64::NoRegister);
608 // SUB Xd, SP, AllocSize
609 emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, TII,
611 EmitCFI, InitialOffset);
612 if (RealignmentPadding) {
613 // AND Xn, Xn, 0b11111...0000
614 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), TargetReg)
615 .addReg(TargetReg, RegState::Kill)
618 }
619
620 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PROBED_STACKALLOC_VAR))
621 .addReg(TargetReg);
622 if (EmitCFI) {
623 // Set the CFA register back to SP.
624 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
625 .buildDefCFARegister(AArch64::SP);
626 }
627 if (RealignmentPadding)
628 AFI->setStackRealigned(true);
629}
630
632 const MachineBasicBlock::iterator PrologueBeginI = MBB.begin();
633 const MachineBasicBlock::iterator EndI = MBB.end();
634
635 // At this point, we're going to decide whether or not the function uses a
636 // redzone. In most cases, the function doesn't have a redzone so let's
637 // assume that's false and set it to true in the case that there's a redzone.
638 AFI->setHasRedZone(false);
639
640 // Debug location must be unknown since the first debug location is used
641 // to determine the end of the prologue.
642 DebugLoc DL;
643
644 // In some cases, particularly with CallingConv::SwiftTail, it is possible to
645 // have a tail-call where the caller only needs to adjust the stack pointer in
646 // the epilogue. In this case, we still need to emit a SEH prologue sequence.
647 // See `seh-minimal-prologue-epilogue.ll` test cases.
648 if (AFI->getArgumentStackToRestore())
649 HasWinCFI = true;
650
651 if (AFI->shouldSignReturnAddress(MF)) {
652 // If pac-ret+leaf is in effect, PAUTH_PROLOGUE pseudo instructions
653 // are inserted by emitPacRetPlusLeafHardening().
654 if (!AFL.shouldSignReturnAddressEverywhere(MF)) {
655 BuildMI(MBB, PrologueBeginI, DL, TII->get(AArch64::PAUTH_PROLOGUE))
657 }
658 // AArch64PointerAuth pass will insert SEH_PACSignLR
660 }
661
662 if (AFI->needsShadowCallStackPrologueEpilogue(MF)) {
663 emitShadowCallStackPrologue(PrologueBeginI, DL);
665 }
666
667 if (EmitCFI && AFI->isMTETagged())
668 BuildMI(MBB, PrologueBeginI, DL, TII->get(AArch64::EMITMTETAGGED))
670
671 // We signal the presence of a Swift extended frame to external tools by
672 // storing FP with 0b0001 in bits 63:60. In normal userland operation a simple
673 // ORR is sufficient, it is assumed a Swift kernel would initialize the TBI
674 // bits so that is still true.
675 if (HasFP && AFI->hasSwiftAsyncContext())
676 emitSwiftAsyncContextFramePointer(PrologueBeginI, DL);
677
678 // All calls are tail calls in GHC calling conv, and functions have no
679 // prologue/epilogue.
680 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
681 return;
682
683 // Set tagged base pointer to the requested stack slot. Ideally it should
684 // match SP value after prologue.
685 if (std::optional<int> TBPI = AFI->getTaggedBasePointerIndex())
686 AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI));
687 else
688 AFI->setTaggedBasePointerOffset(MFI.getStackSize());
689
690 // getStackSize() includes all the locals in its size calculation. We don't
691 // include these locals when computing the stack size of a funclet, as they
692 // are allocated in the parent's stack frame and accessed via the frame
693 // pointer from the funclet. We only save the callee saved registers in the
694 // funclet, which are really the callee saved registers of the parent
695 // function, including the funclet.
696 int64_t NumBytes =
697 IsFunclet ? AFL.getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
698 if (!AFI->hasStackFrame() && !AFL.windowsRequiresStackProbe(MF, NumBytes))
699 return emitEmptyStackFramePrologue(NumBytes, PrologueBeginI, DL);
700
701 bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
702 unsigned FixedObject = AFL.getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
703
704 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
705 // All of the remaining stack allocations are for locals.
706 determineLocalsStackSize(NumBytes, PrologueSaveSize);
707
708 auto [PPR, ZPR] = getSVEStackFrameSizes();
709 SVEStackAllocations SVEAllocs = getSVEStackAllocations({PPR, ZPR});
710
711 MachineBasicBlock::iterator FirstGPRSaveI = PrologueBeginI;
713 assert(!SVEAllocs.AfterPPRs &&
714 "unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord");
715 // If we're doing SVE saves first, we need to immediately allocate space
716 // for fixed objects, then space for the SVE callee saves.
717 //
718 // Windows unwind requires that the scalable size is a multiple of 16;
719 // that's handled when the callee-saved size is computed.
720 auto SaveSize = SVEAllocs.BeforePPRs + StackOffset::getFixed(FixedObject);
721 allocateStackSpace(PrologueBeginI, 0, SaveSize, false, StackOffset{},
722 /*FollowupAllocs=*/true);
723 NumBytes -= FixedObject;
724
725 // Now allocate space for the GPR callee saves.
726 MachineBasicBlock::iterator MBBI = PrologueBeginI;
727 while (MBBI != EndI && isPartOfSVECalleeSaves(MBBI))
728 ++MBBI;
730 MBBI, DL, -AFI->getCalleeSavedStackSize(), EmitAsyncCFI);
731 NumBytes -= AFI->getCalleeSavedStackSize();
732 } else if (CombineSPBump) {
733 assert(!AFL.getSVEStackSize(MF) && "Cannot combine SP bump with SVE");
734 emitFrameOffset(MBB, PrologueBeginI, DL, AArch64::SP, AArch64::SP,
735 StackOffset::getFixed(-NumBytes), TII,
737 EmitAsyncCFI);
738 NumBytes = 0;
739 } else if (HomPrologEpilog) {
740 // Stack has been already adjusted.
741 NumBytes -= PrologueSaveSize;
742 } else if (PrologueSaveSize != 0) {
744 PrologueBeginI, DL, -PrologueSaveSize, EmitAsyncCFI);
745 NumBytes -= PrologueSaveSize;
746 }
747 assert(NumBytes >= 0 && "Negative stack allocation size!?");
748
749 // Move past the saves of the callee-saved registers, fixing up the offsets
750 // and pre-inc if we decided to combine the callee-save and local stack
751 // pointer bump above.
752 auto &TLI = *Subtarget.getTargetLowering();
753
754 MachineBasicBlock::iterator AfterGPRSavesI = FirstGPRSaveI;
755 while (AfterGPRSavesI != EndI &&
756 AfterGPRSavesI->getFlag(MachineInstr::FrameSetup) &&
757 !isPartOfSVECalleeSaves(AfterGPRSavesI)) {
758 if (CombineSPBump &&
759 // Only fix-up frame-setup load/store instructions.
760 (!AFL.requiresSaveVG(MF) || !isVGInstruction(AfterGPRSavesI, TLI)))
761 fixupCalleeSaveRestoreStackOffset(*AfterGPRSavesI,
762 AFI->getLocalStackSize());
763 ++AfterGPRSavesI;
764 }
765
766 // For funclets the FP belongs to the containing function. Only set up FP if
767 // we actually need to.
768 if (!IsFunclet && HasFP)
769 emitFramePointerSetup(AfterGPRSavesI, DL, FixedObject);
770
771 // Now emit the moves for whatever callee saved regs we have (including FP,
772 // LR if those are saved). Frame instructions for SVE register are emitted
773 // later, after the instruction which actually save SVE regs.
774 if (EmitAsyncCFI)
775 emitCalleeSavedGPRLocations(AfterGPRSavesI);
776
777 // Alignment is required for the parent frame, not the funclet
778 const bool NeedsRealignment =
779 NumBytes && !IsFunclet && RegInfo.hasStackRealignment(MF);
780 const int64_t RealignmentPadding =
781 (NeedsRealignment && MFI.getMaxAlign() > Align(16))
782 ? MFI.getMaxAlign().value() - 16
783 : 0;
784
785 if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))
786 emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
787
788 StackOffset NonSVELocalsSize = StackOffset::getFixed(NumBytes);
789 SVEAllocs.AfterZPRs += NonSVELocalsSize;
790
791 StackOffset CFAOffset =
792 StackOffset::getFixed(MFI.getStackSize()) - NonSVELocalsSize;
793 MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
794 // Allocate space for the callee saves and PPR locals (if any).
796 auto [PPRRange, ZPRRange] =
797 partitionSVECS(MBB, AfterGPRSavesI, PPR.CalleeSavesSize,
798 ZPR.CalleeSavesSize, /*IsEpilogue=*/false);
799 AfterSVESavesI = ZPRRange.End;
800 if (EmitAsyncCFI)
801 emitCalleeSavedSVELocations(AfterSVESavesI);
802
803 allocateStackSpace(PPRRange.Begin, 0, SVEAllocs.BeforePPRs,
804 EmitAsyncCFI && !HasFP, CFAOffset,
805 MFI.hasVarSizedObjects() || SVEAllocs.AfterPPRs ||
806 SVEAllocs.AfterZPRs);
807 CFAOffset += SVEAllocs.BeforePPRs;
808 assert(PPRRange.End == ZPRRange.Begin &&
809 "Expected ZPR callee saves after PPR locals");
810 allocateStackSpace(PPRRange.End, 0, SVEAllocs.AfterPPRs,
811 EmitAsyncCFI && !HasFP, CFAOffset,
812 MFI.hasVarSizedObjects() || SVEAllocs.AfterZPRs);
813 CFAOffset += SVEAllocs.AfterPPRs;
814 } else {
816 // Note: With CalleeSavesAboveFrameRecord, the SVE CS (BeforePPRs) have
817 // already been allocated. PPR locals (included in AfterPPRs) are not
818 // supported (note: this is asserted above).
819 CFAOffset += SVEAllocs.BeforePPRs;
820 }
821
822 // Allocate space for the rest of the frame including ZPR locals. Align the
823 // stack as necessary.
824 assert(!(AFL.canUseRedZone(MF) && NeedsRealignment) &&
825 "Cannot use redzone with stack realignment");
826 if (!AFL.canUseRedZone(MF)) {
827 // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have the
828 // correct value here, as NumBytes also includes padding bytes, which
829 // shouldn't be counted here.
830 allocateStackSpace(AfterSVESavesI, RealignmentPadding, SVEAllocs.AfterZPRs,
831 EmitAsyncCFI && !HasFP, CFAOffset,
832 MFI.hasVarSizedObjects());
833 }
834
835 // If we need a base pointer, set it up here. It's whatever the value of the
836 // stack pointer is at this point. Any variable size objects will be
837 // allocated after this, so we can still use the base pointer to reference
838 // locals.
839 //
840 // FIXME: Clarify FrameSetup flags here.
841 // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
842 // needed.
843 // For funclets the BP belongs to the containing function.
844 if (!IsFunclet && RegInfo.hasBasePointer(MF)) {
845 TII->copyPhysReg(MBB, AfterSVESavesI, DL, RegInfo.getBaseRegister(),
846 AArch64::SP, false);
847 if (NeedsWinCFI) {
848 HasWinCFI = true;
849 BuildMI(MBB, AfterSVESavesI, DL, TII->get(AArch64::SEH_Nop))
851 }
852 }
853
854 // The very last FrameSetup instruction indicates the end of prologue. Emit a
855 // SEH opcode indicating the prologue end.
856 if (NeedsWinCFI && HasWinCFI) {
857 BuildMI(MBB, AfterSVESavesI, DL, TII->get(AArch64::SEH_PrologEnd))
859 }
860
861 // SEH funclets are passed the frame pointer in X1. If the parent
862 // function uses the base register, then the base register is used
863 // directly, and is not retrieved from X1.
864 if (IsFunclet && F.hasPersonalityFn()) {
865 EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
867 BuildMI(MBB, AfterSVESavesI, DL, TII->get(TargetOpcode::COPY),
868 AArch64::FP)
869 .addReg(AArch64::X1)
871 MBB.addLiveIn(AArch64::X1);
872 }
873 }
874
875 if (EmitCFI && !EmitAsyncCFI) {
876 if (HasFP) {
877 emitDefineCFAWithFP(AfterSVESavesI, FixedObject);
878 } else {
879 StackOffset TotalSize =
880 AFL.getSVEStackSize(MF) +
881 StackOffset::getFixed((int64_t)MFI.getStackSize());
882 CFIInstBuilder CFIBuilder(MBB, AfterSVESavesI, MachineInstr::FrameSetup);
883 CFIBuilder.insertCFIInst(
884 createDefCFA(RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP,
885 TotalSize, /*LastAdjustmentWasScalable=*/false));
886 }
887 emitCalleeSavedGPRLocations(AfterSVESavesI);
888 emitCalleeSavedSVELocations(AfterSVESavesI);
889 }
890}
891
892void AArch64PrologueEmitter::emitShadowCallStackPrologue(
894 // Shadow call stack prolog: str x30, [x18], #8
895 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXpost))
896 .addReg(AArch64::X18, RegState::Define)
897 .addReg(AArch64::LR)
898 .addReg(AArch64::X18)
899 .addImm(8)
901
902 // This instruction also makes x18 live-in to the entry block.
903 MBB.addLiveIn(AArch64::X18);
904
905 if (NeedsWinCFI)
906 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
908
909 if (EmitCFI) {
910 // Emit a CFI instruction that causes 8 to be subtracted from the value of
911 // x18 when unwinding past this frame.
912 static const char CFIInst[] = {
913 dwarf::DW_CFA_val_expression,
914 18, // register
915 2, // length
916 static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
917 static_cast<char>(-8) & 0x7f, // addend (sleb128)
918 };
919 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
920 .buildEscape(StringRef(CFIInst, sizeof(CFIInst)));
921 }
922}
923
924void AArch64PrologueEmitter::emitSwiftAsyncContextFramePointer(
926 switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
928 if (Subtarget.swiftAsyncContextIsDynamicallySet()) {
929 // The special symbol below is absolute and has a *value* that can be
930 // combined with the frame pointer to signal an extended frame.
931 BuildMI(MBB, MBBI, DL, TII->get(AArch64::LOADgot), AArch64::X16)
932 .addExternalSymbol("swift_async_extendedFramePointerFlags",
934 if (NeedsWinCFI) {
935 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
937 HasWinCFI = true;
938 }
939 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::FP)
940 .addUse(AArch64::FP)
941 .addUse(AArch64::X16)
942 .addImm(Subtarget.isTargetILP32() ? 32 : 0);
943 if (NeedsWinCFI) {
944 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
946 HasWinCFI = true;
947 }
948 break;
949 }
950 [[fallthrough]];
951
953 // ORR x29, x29, #0x1000_0000_0000_0000
954 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXri), AArch64::FP)
955 .addUse(AArch64::FP)
956 .addImm(0x1100)
958 if (NeedsWinCFI) {
959 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
961 HasWinCFI = true;
962 }
963 break;
964
966 break;
967 }
968}
969
970void AArch64PrologueEmitter::emitEmptyStackFramePrologue(
971 int64_t NumBytes, MachineBasicBlock::iterator MBBI,
972 const DebugLoc &DL) const {
973 assert(!HasFP && "unexpected function without stack frame but with FP");
974 assert(!AFL.getSVEStackSize(MF) &&
975 "unexpected function without stack frame but with SVE objects");
976 // All of the stack allocation is for locals.
977 AFI->setLocalStackSize(NumBytes);
978 if (!NumBytes) {
979 if (NeedsWinCFI && HasWinCFI) {
980 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
982 }
983 return;
984 }
985 // REDZONE: If the stack size is less than 128 bytes, we don't need
986 // to actually allocate.
987 if (AFL.canUseRedZone(MF)) {
988 AFI->setHasRedZone(true);
989 ++NumRedZoneFunctions;
990 } else {
991 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
992 StackOffset::getFixed(-NumBytes), TII,
994 if (EmitCFI) {
995 // Label used to tie together the PROLOG_LABEL and the MachineMoves.
996 MCSymbol *FrameLabel = MF.getContext().createTempSymbol();
997 // Encode the stack size of the leaf function.
998 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
999 .buildDefCFAOffset(NumBytes, FrameLabel);
1000 }
1001 }
1002
1003 if (NeedsWinCFI) {
1004 HasWinCFI = true;
1005 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
1007 }
1008}
1009
1010void AArch64PrologueEmitter::emitFramePointerSetup(
1012 unsigned FixedObject) {
1013 int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
1014 if (CombineSPBump)
1015 FPOffset += AFI->getLocalStackSize();
1016
1017 if (AFI->hasSwiftAsyncContext()) {
1018 // Before we update the live FP we have to ensure there's a valid (or
1019 // null) asynchronous context in its slot just before FP in the frame
1020 // record, so store it now.
1021 const auto &Attrs = MF.getFunction().getAttributes();
1022 bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
1023 if (HaveInitialContext)
1024 MBB.addLiveIn(AArch64::X22);
1025 Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
1026 BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext))
1027 .addUse(Reg)
1028 .addUse(AArch64::SP)
1029 .addImm(FPOffset - 8)
1031 if (NeedsWinCFI) {
1032 // WinCFI and arm64e, where StoreSwiftAsyncContext is expanded
1033 // to multiple instructions, should be mutually-exclusive.
1034 assert(Subtarget.getTargetTriple().getArchName() != "arm64e");
1035 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1037 HasWinCFI = true;
1038 }
1039 }
1040
1041 if (HomPrologEpilog) {
1042 auto Prolog = MBBI;
1043 --Prolog;
1044 assert(Prolog->getOpcode() == AArch64::HOM_Prolog);
1045 Prolog->addOperand(MachineOperand::CreateImm(FPOffset));
1046 } else {
1047 // Issue sub fp, sp, FPOffset or
1048 // mov fp,sp when FPOffset is zero.
1049 // Note: All stores of callee-saved registers are marked as "FrameSetup".
1050 // This code marks the instruction(s) that set the FP also.
1051 emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
1052 StackOffset::getFixed(FPOffset), TII,
1054 if (NeedsWinCFI && HasWinCFI) {
1055 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
1057 // After setting up the FP, the rest of the prolog doesn't need to be
1058 // included in the SEH unwind info.
1059 NeedsWinCFI = false;
1060 }
1061 }
1062 if (EmitAsyncCFI)
1063 emitDefineCFAWithFP(MBBI, FixedObject);
1064}
1065
1066// Define the current CFA rule to use the provided FP.
1067void AArch64PrologueEmitter::emitDefineCFAWithFP(
1068 MachineBasicBlock::iterator MBBI, unsigned FixedObject) const {
1069 const int OffsetToFirstCalleeSaveFromFP =
1070 AFI->getCalleeSaveBaseToFrameRecordOffset() -
1071 AFI->getCalleeSavedStackSize();
1072 Register FramePtr = RegInfo.getFrameRegister(MF);
1073 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
1074 .buildDefCFA(FramePtr, FixedObject - OffsetToFirstCalleeSaveFromFP);
1075}
1076
1077void AArch64PrologueEmitter::emitWindowsStackProbe(
1078 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t &NumBytes,
1079 int64_t RealignmentPadding) const {
1080 if (AFI->getSVECalleeSavedStackSize())
1081 report_fatal_error("SVE callee saves not yet supported with stack probing");
1082
1083 // Find an available register to spill the value of X15 to, if X15 is being
1084 // used already for nest.
1085 unsigned X15Scratch = AArch64::NoRegister;
1086 if (llvm::any_of(MBB.liveins(),
1087 [this](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
1088 return RegInfo.isSuperOrSubRegisterEq(AArch64::X15,
1089 LiveIn.PhysReg);
1090 })) {
1091 X15Scratch = AFL.findScratchNonCalleeSaveRegister(&MBB, /*HasCall=*/true);
1092 assert(X15Scratch != AArch64::NoRegister &&
1093 (X15Scratch < AArch64::X15 || X15Scratch > AArch64::X17));
1094#ifndef NDEBUG
1095 LiveRegs.removeReg(AArch64::X15); // ignore X15 since we restore it
1096#endif
1097 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), X15Scratch)
1098 .addReg(AArch64::XZR)
1099 .addReg(AArch64::X15, RegState::Undef)
1100 .addReg(AArch64::X15, RegState::Implicit)
1102 }
1103
1104 uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4;
1105 if (NeedsWinCFI) {
1106 HasWinCFI = true;
1107 // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
1108 // exceed this amount. We need to move at most 2^24 - 1 into x15.
1109 // This is at most two instructions, MOVZ followed by MOVK.
1110 // TODO: Fix to use multiple stack alloc unwind codes for stacks
1111 // exceeding 256MB in size.
1112 if (NumBytes >= (1 << 28))
1113 report_fatal_error("Stack size cannot exceed 256MB for stack "
1114 "unwinding purposes");
1115
1116 uint32_t LowNumWords = NumWords & 0xFFFF;
1117 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
1118 .addImm(LowNumWords)
1121 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1123 if ((NumWords & 0xFFFF0000) != 0) {
1124 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
1125 .addReg(AArch64::X15)
1126 .addImm((NumWords & 0xFFFF0000) >> 16) // High half
1129 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1131 }
1132 } else {
1133 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
1134 .addImm(NumWords)
1136 }
1137
1138 const char *ChkStk = Subtarget.getChkStkName();
1139 switch (MF.getTarget().getCodeModel()) {
1140 case CodeModel::Tiny:
1141 case CodeModel::Small:
1142 case CodeModel::Medium:
1143 case CodeModel::Kernel:
1144 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
1145 .addExternalSymbol(ChkStk)
1146 .addReg(AArch64::X15, RegState::Implicit)
1147 .addReg(AArch64::X16,
1149 .addReg(AArch64::X17,
1151 .addReg(AArch64::NZCV,
1154 if (NeedsWinCFI) {
1155 HasWinCFI = true;
1156 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1158 }
1159 break;
1160 case CodeModel::Large:
1161 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
1162 .addReg(AArch64::X16, RegState::Define)
1163 .addExternalSymbol(ChkStk)
1164 .addExternalSymbol(ChkStk)
1166 if (NeedsWinCFI) {
1167 HasWinCFI = true;
1168 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1170 }
1171
1173 .addReg(AArch64::X16, RegState::Kill)
1175 .addReg(AArch64::X16,
1177 .addReg(AArch64::X17,
1179 .addReg(AArch64::NZCV,
1182 if (NeedsWinCFI) {
1183 HasWinCFI = true;
1184 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1186 }
1187 break;
1188 }
1189
1190 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
1191 .addReg(AArch64::SP, RegState::Kill)
1192 .addReg(AArch64::X15, RegState::Kill)
1195 if (NeedsWinCFI) {
1196 HasWinCFI = true;
1197 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
1198 .addImm(NumBytes)
1200 }
1201 NumBytes = 0;
1202
1203 if (RealignmentPadding > 0) {
1204 if (RealignmentPadding >= 4096) {
1205 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm))
1206 .addReg(AArch64::X16, RegState::Define)
1207 .addImm(RealignmentPadding)
1209 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXrx64), AArch64::X15)
1210 .addReg(AArch64::SP)
1211 .addReg(AArch64::X16, RegState::Kill)
1214 } else {
1215 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15)
1216 .addReg(AArch64::SP)
1217 .addImm(RealignmentPadding)
1218 .addImm(0)
1220 }
1221
1222 uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
1223 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
1224 .addReg(AArch64::X15, RegState::Kill)
1226 AFI->setStackRealigned(true);
1227
1228 // No need for SEH instructions here; if we're realigning the stack,
1229 // we've set a frame pointer and already finished the SEH prologue.
1231 }
1232 if (X15Scratch != AArch64::NoRegister) {
1233 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), AArch64::X15)
1234 .addReg(AArch64::XZR)
1235 .addReg(X15Scratch, RegState::Undef)
1236 .addReg(X15Scratch, RegState::Implicit)
1238 }
1239}
1240
1241void AArch64PrologueEmitter::emitCalleeSavedGPRLocations(
1243 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1244 if (CSI.empty())
1245 return;
1246
1247 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
1248 for (const auto &Info : CSI) {
1249 unsigned FrameIdx = Info.getFrameIdx();
1250 if (MFI.hasScalableStackID(FrameIdx))
1251 continue;
1252
1253 assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
1254 int64_t Offset = MFI.getObjectOffset(FrameIdx) - AFL.getOffsetOfLocalArea();
1255 CFIBuilder.buildOffset(Info.getReg(), Offset);
1256 }
1257}
1258
1259void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
1261 // Add callee saved registers to move list.
1262 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1263 if (CSI.empty())
1264 return;
1265
1266 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
1267
1268 std::optional<int64_t> IncomingVGOffsetFromDefCFA;
1269 if (AFL.requiresSaveVG(MF)) {
1270 auto IncomingVG = *find_if(
1271 reverse(CSI), [](auto &Info) { return Info.getReg() == AArch64::VG; });
1272 IncomingVGOffsetFromDefCFA = MFI.getObjectOffset(IncomingVG.getFrameIdx()) -
1273 AFL.getOffsetOfLocalArea();
1274 }
1275
1276 StackOffset PPRStackSize = AFL.getPPRStackSize(MF);
1277 for (const auto &Info : CSI) {
1278 int FI = Info.getFrameIdx();
1279 if (!MFI.hasScalableStackID(FI))
1280 continue;
1281
1282 // Not all unwinders may know about SVE registers, so assume the lowest
1283 // common denominator.
1284 assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
1285 MCRegister Reg = Info.getReg();
1286 if (!RegInfo.regNeedsCFI(Reg, Reg))
1287 continue;
1288
1289 StackOffset Offset =
1290 StackOffset::getScalable(MFI.getObjectOffset(FI)) -
1291 StackOffset::getFixed(AFI->getCalleeSavedStackSize(MFI));
1292
1293 // The scalable vectors are below (lower address) the scalable predicates
1294 // with split SVE objects, so we must subtract the size of the predicates.
1296 MFI.getStackID(FI) == TargetStackID::ScalableVector)
1297 Offset -= PPRStackSize;
1298
1299 CFIBuilder.insertCFIInst(
1300 createCFAOffset(RegInfo, Reg, Offset, IncomingVGOffsetFromDefCFA));
1301 }
1302}
1303
1305 switch (MI.getOpcode()) {
1306 default:
1307 return false;
1308 case AArch64::CATCHRET:
1309 case AArch64::CLEANUPRET:
1310 return true;
1311 }
1312}
1313
1318 EmitCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
1319 HomPrologEpilog = AFL.homogeneousPrologEpilog(MF, &MBB);
1320 SEHEpilogueStartI = MBB.end();
1321}
1322
1323void AArch64EpilogueEmitter::moveSPBelowFP(MachineBasicBlock::iterator MBBI,
1325 // Other combinations could be supported, but are not currently needed.
1326 assert(Offset.getScalable() < 0 && Offset.getFixed() <= 0 &&
1327 "expected negative offset (with optional fixed portion)");
1328 Register Base = AArch64::FP;
1329 if (int64_t FixedOffset = Offset.getFixed()) {
1330 // If we have a negative fixed offset, we need to first subtract it in a
1331 // temporary register first (to avoid briefly deallocating the scalable
1332 // portion of the offset).
1333 Base = MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
1334 emitFrameOffset(MBB, MBBI, DL, Base, AArch64::FP,
1335 StackOffset::getFixed(FixedOffset), TII,
1337 }
1338 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, Base,
1339 StackOffset::getScalable(Offset.getScalable()), TII,
1341}
1342
1344 MachineBasicBlock::iterator EpilogueEndI = MBB.getLastNonDebugInstr();
1345 if (MBB.end() != EpilogueEndI) {
1346 DL = EpilogueEndI->getDebugLoc();
1347 IsFunclet = isFuncletReturnInstr(*EpilogueEndI);
1348 }
1349
1350 int64_t NumBytes =
1351 IsFunclet ? AFL.getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
1352
1353 // All calls are tail calls in GHC calling conv, and functions have no
1354 // prologue/epilogue.
1355 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1356 return;
1357
1358 // How much of the stack used by incoming arguments this function is expected
1359 // to restore in this particular epilogue.
1360 int64_t ArgumentStackToRestore = AFL.getArgumentStackToRestore(MF, MBB);
1361 bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(),
1362 MF.getFunction().isVarArg());
1363 unsigned FixedObject = AFL.getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
1364
1365 int64_t AfterCSRPopSize = ArgumentStackToRestore;
1366 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
1367 // We cannot rely on the local stack size set in emitPrologue if the function
1368 // has funclets, as funclets have different local stack size requirements, and
1369 // the current value set in emitPrologue may be that of the containing
1370 // function.
1371 if (MF.hasEHFunclets())
1372 AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
1373
1374 if (HomPrologEpilog) {
1376 auto FirstHomogenousEpilogI = MBB.getFirstTerminator();
1377 if (FirstHomogenousEpilogI != MBB.begin()) {
1378 auto HomogeneousEpilog = std::prev(FirstHomogenousEpilogI);
1379 if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog)
1380 FirstHomogenousEpilogI = HomogeneousEpilog;
1381 }
1382
1383 // Adjust local stack
1384 emitFrameOffset(MBB, FirstHomogenousEpilogI, DL, AArch64::SP, AArch64::SP,
1385 StackOffset::getFixed(AFI->getLocalStackSize()), TII,
1387
1388 // SP has been already adjusted while restoring callee save regs.
1389 // We've bailed-out the case with adjusting SP for arguments.
1390 assert(AfterCSRPopSize == 0);
1391 return;
1392 }
1393
1394 bool CombineSPBump = shouldCombineCSRLocalStackBump(NumBytes);
1395
1396 unsigned ProloguePopSize = PrologueSaveSize;
1398 // With CalleeSavesAboveFrameRecord ProloguePopSize is the amount of stack
1399 // that needs to be popped until we reach the start of the SVE save area.
1400 // The "FixedObject" stack occurs after the SVE area and must be popped
1401 // later.
1402 ProloguePopSize -= FixedObject;
1403 AfterCSRPopSize += FixedObject;
1404 }
1405
1406 // Assume we can't combine the last pop with the sp restore.
1407 if (!CombineSPBump && ProloguePopSize != 0) {
1408 MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
1409 while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
1410 AArch64InstrInfo::isSEHInstruction(*Pop) ||
1413 Pop = std::prev(Pop);
1414 // Converting the last ldp to a post-index ldp is valid only if the last
1415 // ldp's offset is 0.
1416 const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
1417 // If the offset is 0 and the AfterCSR pop is not actually trying to
1418 // allocate more stack for arguments (in space that an untimely interrupt
1419 // may clobber), convert it to a post-index ldp.
1420 if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) {
1422 Pop, DL, ProloguePopSize, EmitCFI, MachineInstr::FrameDestroy,
1423 ProloguePopSize);
1425 MachineBasicBlock::iterator AfterLastPop = std::next(Pop);
1426 if (AArch64InstrInfo::isSEHInstruction(*AfterLastPop))
1427 ++AfterLastPop;
1428 // If not, and CalleeSavesAboveFrameRecord is enabled, deallocate
1429 // callee-save non-SVE registers to move the stack pointer to the start of
1430 // the SVE area.
1431 emitFrameOffset(MBB, AfterLastPop, DL, AArch64::SP, AArch64::SP,
1432 StackOffset::getFixed(ProloguePopSize), TII,
1434 &HasWinCFI);
1435 } else {
1436 // Otherwise, make sure to emit an add after the last ldp.
1437 // We're doing this by transferring the size to be restored from the
1438 // adjustment *before* the CSR pops to the adjustment *after* the CSR
1439 // pops.
1440 AfterCSRPopSize += ProloguePopSize;
1441 }
1442 }
1443
1444 // Move past the restores of the callee-saved registers.
1445 // If we plan on combining the sp bump of the local stack size and the callee
1446 // save stack size, we might need to adjust the CSR save and restore offsets.
1447 MachineBasicBlock::iterator FirstGPRRestoreI = MBB.getFirstTerminator();
1448 MachineBasicBlock::iterator Begin = MBB.begin();
1449 while (FirstGPRRestoreI != Begin) {
1450 --FirstGPRRestoreI;
1451 if (!FirstGPRRestoreI->getFlag(MachineInstr::FrameDestroy) ||
1453 isPartOfSVECalleeSaves(FirstGPRRestoreI))) {
1454 ++FirstGPRRestoreI;
1455 break;
1456 } else if (CombineSPBump)
1457 fixupCalleeSaveRestoreStackOffset(*FirstGPRRestoreI,
1458 AFI->getLocalStackSize());
1459 }
1460
1461 if (NeedsWinCFI) {
1462 // Note that there are cases where we insert SEH opcodes in the
1463 // epilogue when we had no SEH opcodes in the prologue. For
1464 // example, when there is no stack frame but there are stack
1465 // arguments. Insert the SEH_EpilogStart and remove it later if it
1466 // we didn't emit any SEH opcodes to avoid generating WinCFI for
1467 // functions that don't need it.
1468 BuildMI(MBB, FirstGPRRestoreI, DL, TII->get(AArch64::SEH_EpilogStart))
1470 SEHEpilogueStartI = FirstGPRRestoreI;
1471 --SEHEpilogueStartI;
1472 }
1473
1474 // Determine the ranges of SVE callee-saves. This is done before emitting any
1475 // code at the end of the epilogue (for Swift async), which can get in the way
1476 // of finding SVE callee-saves with CalleeSavesAboveFrameRecord.
1477 auto [PPR, ZPR] = getSVEStackFrameSizes();
1478 auto [PPRRange, ZPRRange] = partitionSVECS(
1479 MBB,
1481 ? MBB.getFirstTerminator()
1482 : FirstGPRRestoreI,
1483 PPR.CalleeSavesSize, ZPR.CalleeSavesSize, /*IsEpilogue=*/true);
1484
1485 if (HasFP && AFI->hasSwiftAsyncContext())
1486 emitSwiftAsyncContextFramePointer(EpilogueEndI, DL);
1487
1488 // If there is a single SP update, insert it before the ret and we're done.
1489 if (CombineSPBump) {
1490 assert(!AFI->hasSVEStackSize() && "Cannot combine SP bump with SVE");
1491
1492 // When we are about to restore the CSRs, the CFA register is SP again.
1493 if (EmitCFI && HasFP)
1495 .buildDefCFA(AArch64::SP, NumBytes);
1496
1497 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
1498 StackOffset::getFixed(NumBytes + AfterCSRPopSize), TII,
1500 EmitCFI, StackOffset::getFixed(NumBytes));
1501 return;
1502 }
1503
1504 NumBytes -= PrologueSaveSize;
1505 assert(NumBytes >= 0 && "Negative stack allocation size!?");
1506
1507 StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize;
1508 SVEStackAllocations SVEAllocs = getSVEStackAllocations({PPR, ZPR});
1509
1510 // Deallocate the SVE area.
1512 assert(!SVEAllocs.AfterPPRs &&
1513 "unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord");
1514 // If the callee-save area is before FP, restoring the FP implicitly
1515 // deallocates non-callee-save SVE allocations. Otherwise, deallocate them
1516 // explicitly.
1517 if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
1518 emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1520 false, NeedsWinCFI, &HasWinCFI);
1521 }
1522
1523 // Deallocate callee-save SVE registers.
1524 emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP,
1526 false, NeedsWinCFI, &HasWinCFI);
1527 } else if (AFI->hasSVEStackSize()) {
1528 // If we have stack realignment or variable-sized objects we must use the FP
1529 // to restore SVE callee saves (as there is an unknown amount of
1530 // data/padding between the SP and SVE CS area).
1531 Register BaseForSVEDealloc =
1532 (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
1533 : AArch64::SP;
1534 if (SVECalleeSavesSize && BaseForSVEDealloc == AArch64::FP) {
1535 if (ZPR.CalleeSavesSize || SVELayout != SVEStackLayout::Split) {
1536 // The offset from the frame-pointer to the start of the ZPR saves.
1537 StackOffset FPOffsetZPR =
1538 -SVECalleeSavesSize - PPR.LocalsSize -
1539 StackOffset::getFixed(AFI->getCalleeSaveBaseToFrameRecordOffset());
1540 // Deallocate the stack space space by moving the SP to the start of the
1541 // ZPR/PPR callee-save area.
1542 moveSPBelowFP(ZPRRange.Begin, FPOffsetZPR);
1543 }
1544 // With split SVE, the predicates are stored in a separate area above the
1545 // ZPR saves, so we must adjust the stack to the start of the PPRs.
1546 if (PPR.CalleeSavesSize && SVELayout == SVEStackLayout::Split) {
1547 // The offset from the frame-pointer to the start of the PPR saves.
1548 StackOffset FPOffsetPPR = -PPR.CalleeSavesSize;
1549 // Move to the start of the PPR area.
1550 assert(!FPOffsetPPR.getFixed() && "expected only scalable offset");
1551 emitFrameOffset(MBB, ZPRRange.End, DL, AArch64::SP, AArch64::FP,
1552 FPOffsetPPR, TII, MachineInstr::FrameDestroy);
1553 }
1554 } else if (BaseForSVEDealloc == AArch64::SP) {
1555 auto NonSVELocals = StackOffset::getFixed(NumBytes);
1556 auto CFAOffset = NonSVELocals + StackOffset::getFixed(PrologueSaveSize) +
1557 SVEAllocs.totalSize();
1558
1559 if (SVECalleeSavesSize || SVELayout == SVEStackLayout::Split) {
1560 // Deallocate non-SVE locals now. This is needed to reach the SVE callee
1561 // saves, but may also allow combining stack hazard bumps for split SVE.
1562 SVEAllocs.AfterZPRs += NonSVELocals;
1563 NumBytes -= NonSVELocals.getFixed();
1564 }
1565 // To deallocate the SVE stack adjust by the allocations in reverse.
1566 emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP,
1568 false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1569 CFAOffset);
1570 CFAOffset -= SVEAllocs.AfterZPRs;
1571 assert(PPRRange.Begin == ZPRRange.End &&
1572 "Expected PPR restores after ZPR");
1573 emitFrameOffset(MBB, PPRRange.Begin, DL, AArch64::SP, AArch64::SP,
1575 false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1576 CFAOffset);
1577 CFAOffset -= SVEAllocs.AfterPPRs;
1578 emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP,
1580 false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1581 CFAOffset);
1582 }
1583
1584 if (EmitCFI)
1585 emitCalleeSavedSVERestores(
1586 SVELayout == SVEStackLayout::Split ? ZPRRange.End : PPRRange.End);
1587 }
1588
1589 if (!HasFP) {
1590 bool RedZone = AFL.canUseRedZone(MF);
1591 // If this was a redzone leaf function, we don't need to restore the
1592 // stack pointer (but we may need to pop stack args for fastcc).
1593 if (RedZone && AfterCSRPopSize == 0)
1594 return;
1595
1596 // Pop the local variables off the stack. If there are no callee-saved
1597 // registers, it means we are actually positioned at the terminator and can
1598 // combine stack increment for the locals and the stack increment for
1599 // callee-popped arguments into (possibly) a single instruction and be done.
1600 bool NoCalleeSaveRestore = PrologueSaveSize == 0;
1601 int64_t StackRestoreBytes = RedZone ? 0 : NumBytes;
1602 if (NoCalleeSaveRestore)
1603 StackRestoreBytes += AfterCSRPopSize;
1604
1606 MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1607 StackOffset::getFixed(StackRestoreBytes), TII,
1609 StackOffset::getFixed((RedZone ? 0 : NumBytes) + PrologueSaveSize));
1610
1611 // If we were able to combine the local stack pop with the argument pop,
1612 // then we're done.
1613 if (NoCalleeSaveRestore || AfterCSRPopSize == 0)
1614 return;
1615
1616 NumBytes = 0;
1617 }
1618
1619 // Restore the original stack pointer.
1620 // FIXME: Rather than doing the math here, we should instead just use
1621 // non-post-indexed loads for the restores if we aren't actually going to
1622 // be able to save any instructions.
1623 if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
1625 MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::FP,
1626 StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()),
1628 } else if (NumBytes)
1629 emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1630 StackOffset::getFixed(NumBytes), TII,
1632
1633 // When we are about to restore the CSRs, the CFA register is SP again.
1634 if (EmitCFI && HasFP)
1636 .buildDefCFA(AArch64::SP, PrologueSaveSize);
1637
1638 // This must be placed after the callee-save restore code because that code
1639 // assumes the SP is at the same location as it was after the callee-save save
1640 // code in the prologue.
1641 if (AfterCSRPopSize) {
1642 assert(AfterCSRPopSize > 0 && "attempting to reallocate arg stack that an "
1643 "interrupt may have clobbered");
1644
1646 MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
1648 false, NeedsWinCFI, &HasWinCFI, EmitCFI,
1649 StackOffset::getFixed(AfterCSRPopSize - ArgumentStackToRestore));
1650 }
1651}
1652
1653bool AArch64EpilogueEmitter::shouldCombineCSRLocalStackBump(
1654 uint64_t StackBumpBytes) const {
1656 StackBumpBytes))
1657 return false;
1658 if (MBB.empty())
1659 return true;
1660
1661 // Disable combined SP bump if the last instruction is an MTE tag store. It
1662 // is almost always better to merge SP adjustment into those instructions.
1663 MachineBasicBlock::iterator LastI = MBB.getFirstTerminator();
1664 MachineBasicBlock::iterator Begin = MBB.begin();
1665 while (LastI != Begin) {
1666 --LastI;
1667 if (LastI->isTransient())
1668 continue;
1669 if (!LastI->getFlag(MachineInstr::FrameDestroy))
1670 break;
1671 }
1672 switch (LastI->getOpcode()) {
1673 case AArch64::STGloop:
1674 case AArch64::STZGloop:
1675 case AArch64::STGi:
1676 case AArch64::STZGi:
1677 case AArch64::ST2Gi:
1678 case AArch64::STZ2Gi:
1679 return false;
1680 default:
1681 return true;
1682 }
1683 llvm_unreachable("unreachable");
1684}
1685
1686void AArch64EpilogueEmitter::emitSwiftAsyncContextFramePointer(
1688 switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
1690 // Avoid the reload as it is GOT relative, and instead fall back to the
1691 // hardcoded value below. This allows a mismatch between the OS and
1692 // application without immediately terminating on the difference.
1693 [[fallthrough]];
1695 // We need to reset FP to its untagged state on return. Bit 60 is
1696 // currently used to show the presence of an extended frame.
1697
1698 // BIC x29, x29, #0x1000_0000_0000_0000
1699 BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::ANDXri),
1700 AArch64::FP)
1701 .addUse(AArch64::FP)
1702 .addImm(0x10fe)
1704 if (NeedsWinCFI) {
1705 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1707 HasWinCFI = true;
1708 }
1709 break;
1710
1712 break;
1713 }
1714}
1715
1716void AArch64EpilogueEmitter::emitShadowCallStackEpilogue(
1718 // Shadow call stack epilog: ldr x30, [x18, #-8]!
1719 BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXpre))
1720 .addReg(AArch64::X18, RegState::Define)
1721 .addReg(AArch64::LR, RegState::Define)
1722 .addReg(AArch64::X18)
1723 .addImm(-8)
1725
1726 if (NeedsWinCFI)
1727 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1729
1730 if (AFI->needsAsyncDwarfUnwindInfo(MF))
1731 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameDestroy)
1732 .buildRestore(AArch64::X18);
1733}
1734
1735void AArch64EpilogueEmitter::emitCalleeSavedRestores(
1736 MachineBasicBlock::iterator MBBI, bool SVE) const {
1737 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1738 if (CSI.empty())
1739 return;
1740
1741 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameDestroy);
1742
1743 for (const auto &Info : CSI) {
1744 if (SVE != MFI.hasScalableStackID(Info.getFrameIdx()))
1745 continue;
1746
1747 MCRegister Reg = Info.getReg();
1748 if (SVE && !RegInfo.regNeedsCFI(Reg, Reg))
1749 continue;
1750
1751 CFIBuilder.buildRestore(Info.getReg());
1752 }
1753}
1754
1755void AArch64EpilogueEmitter::finalizeEpilogue() const {
1756 if (AFI->needsShadowCallStackPrologueEpilogue(MF)) {
1757 emitShadowCallStackEpilogue(MBB.getFirstTerminator(), DL);
1759 }
1760 if (EmitCFI)
1761 emitCalleeSavedGPRRestores(MBB.getFirstTerminator());
1762 if (AFI->shouldSignReturnAddress(MF)) {
1763 // If pac-ret+leaf is in effect, PAUTH_EPILOGUE pseudo instructions
1764 // are inserted by emitPacRetPlusLeafHardening().
1765 if (!AFL.shouldSignReturnAddressEverywhere(MF)) {
1766 BuildMI(MBB, MBB.getFirstTerminator(), DL,
1767 TII->get(AArch64::PAUTH_EPILOGUE))
1769 }
1770 // AArch64PointerAuth pass will insert SEH_PACSignLR
1772 }
1773 if (HasWinCFI) {
1774 BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
1776 if (!MF.hasWinCFI())
1777 MF.setHasWinCFI(true);
1778 }
1779 if (NeedsWinCFI) {
1780 assert(SEHEpilogueStartI != MBB.end());
1781 if (!HasWinCFI)
1782 MBB.erase(SEHEpilogueStartI);
1783 }
1784}
1785
1786} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file contains the declaration of the AArch64PrologueEmitter and AArch64EpilogueEmitter classes,...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file contains constants used for implementing Dwarf debug support.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
if(PassOpts->AAPipeline)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
static const unsigned FramePtr
AArch64EpilogueEmitter(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64FrameLowering &AFL)
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
AArch64PrologueEmitter(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64FrameLowering &AFL)
MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int CSStackSizeInc, bool EmitCFI, MachineInstr::MIFlag FrameFlag=MachineInstr::FrameSetup, int CFAOffset=0) const
bool isVGInstruction(MachineBasicBlock::iterator MBBI, const TargetLowering &TLI) const
AArch64PrologueEpilogueCommon(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64FrameLowering &AFL)
void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, uint64_t LocalStackSize) const
bool shouldCombineCSRLocalStackBump(uint64_t StackBumpBytes) const
SVEStackAllocations getSVEStackAllocations(SVEFrameSizes const &)
const AArch64TargetLowering * getTargetLowering() const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
Helper class for creating CFI instructions and inserting them into MIR.
void buildDefCFAOffset(int64_t Offset, MCSymbol *Label=nullptr) const
void insertCFIInst(const MCCFIInstruction &CFIInst) const
void buildDefCFA(MCRegister Reg, int64_t Offset) const
A debug info location.
Definition DebugLoc.h:124
A set of physical registers with utility functions to track liveness when walking backward/forward th...
MachineInstrBundleIterator< MachineInstr > iterator
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
int64_t getImm() const
bool isSymbol() const
isSymbol - Tests if this is a MO_ExternalSymbol operand.
static MachineOperand CreateImm(int64_t Val)
const char * getSymbolName() const
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:47
int64_t getScalable() const
Returns the scalable component of the stack.
Definition TypeSize.h:50
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:40
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:344
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:201
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
const unsigned StackProbeMaxUnprobedStack
Maximum allowed number of unprobed bytes above SP at an ABI boundary.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO, RTLIB::Libcall LC)
static bool isPartOfSVECalleeSaves(MachineBasicBlock::iterator I)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
static bool isFuncletReturnInstr(const MachineInstr &MI)
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI, LivePhysRegs &LiveRegs)
Collect live registers from the end of MI's parent up to (including) MI in LiveRegs.
@ DeploymentBased
Determine whether to set the bit statically or dynamically based on the deployment target.
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
static bool isPartOfPPRCalleeSaves(MachineBasicBlock::iterator I)
@ Success
The lock was released successfully.
static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI, unsigned LocalStackSize)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA, std::optional< int64_t > IncomingVGOffsetFromDefCFA)
DWARFExpression::Operation Op
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
static SVEPartitions partitionSVECS(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, StackOffset PPRCalleeSavesSize, StackOffset ZPRCalleeSavesSize, bool IsEpilogue)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
static bool isPartOfZPRCalleeSaves(MachineBasicBlock::iterator I)
static int64_t upperBound(StackOffset Size)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
MachineBasicBlock::iterator End
struct llvm::SVEPartitions::@327166152017175235362202041204223104077330276266 ZPR
struct llvm::SVEPartitions::@327166152017175235362202041204223104077330276266 PPR
MachineBasicBlock::iterator Begin