LLVM 23.0.0git
AArch64InstrInfo.cpp
Go to the documentation of this file.
1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64ExpandImm.h"
16#include "AArch64PointerAuth.h"
17#include "AArch64Subtarget.h"
22#include "llvm/ADT/ArrayRef.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SmallSet.h"
26#include "llvm/ADT/Statistic.h"
45#include "llvm/IR/DebugLoc.h"
46#include "llvm/IR/GlobalValue.h"
47#include "llvm/IR/Module.h"
48#include "llvm/MC/MCAsmInfo.h"
49#include "llvm/MC/MCInst.h"
51#include "llvm/MC/MCInstrDesc.h"
56#include "llvm/Support/LEB128.h"
60#include <cassert>
61#include <cstdint>
62#include <iterator>
63#include <utility>
64
65using namespace llvm;
66
67#define GET_INSTRINFO_CTOR_DTOR
68#include "AArch64GenInstrInfo.inc"
69
70#define DEBUG_TYPE "AArch64InstrInfo"
71
72STATISTIC(NumCopyInstrs, "Number of COPY instructions expanded");
73STATISTIC(NumZCRegMoveInstrsGPR, "Number of zero-cycle GPR register move "
74 "instructions expanded from canonical COPY");
75STATISTIC(NumZCRegMoveInstrsFPR, "Number of zero-cycle FPR register move "
76 "instructions expanded from canonical COPY");
77STATISTIC(NumZCZeroingInstrsGPR, "Number of zero-cycle GPR zeroing "
78 "instructions expanded from canonical COPY");
79// NumZCZeroingInstrsFPR is counted at AArch64AsmPrinter
80
82 CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9),
83 cl::desc("Restrict range of CB instructions (DEBUG)"));
84
86 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
87 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
88
90 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
91 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
92
94 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
95 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
96
98 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
99 cl::desc("Restrict range of B instructions (DEBUG)"));
100
102 "aarch64-search-limit", cl::Hidden, cl::init(2048),
103 cl::desc("Restrict range of instructions to search for the "
104 "machine-combiner gather pattern optimization"));
105
107 : AArch64GenInstrInfo(STI, RI, AArch64::ADJCALLSTACKDOWN,
108 AArch64::ADJCALLSTACKUP, AArch64::CATCHRET),
109 RI(STI.getTargetTriple(), STI.getHwMode()), Subtarget(STI) {}
110
111/// Return the maximum number of bytes of code the specified instruction may be
112/// after LFI rewriting. If the instruction is not rewritten, std::nullopt is
113/// returned (use default sizing).
114///
115/// NOTE: the size estimates here must be kept in sync with the rewrites in
116/// AArch64MCLFIRewriter.cpp. Sizes may be overestimates of the rewritten
117/// instruction sequences.
118static std::optional<unsigned> getLFIInstSizeInBytes(const MachineInstr &MI) {
119 switch (MI.getOpcode()) {
120 case AArch64::SVC:
121 // SVC expands to 4 instructions.
122 return 16;
123 case AArch64::BR:
124 case AArch64::BLR:
125 // Indirect branches/calls expand to 2 instructions (guard + br/blr).
126 return 8;
127 case AArch64::RET:
128 // RET through LR is not rewritten, but RET through another register
129 // expands to 2 instructions (guard + ret).
130 if (MI.getOperand(0).getReg() != AArch64::LR)
131 return 8;
132 return 4;
133 case AArch64::SYSxt:
134 // VA-based DC/IC ops (op1=3, Cn=7, op2=1) expand to 2 instructions.
135 if (MI.getOperand(0).getImm() == 3 && MI.getOperand(1).getImm() == 7 &&
136 MI.getOperand(3).getImm() == 1)
137 return 8;
138 return std::nullopt;
139 default:
140 break;
141 }
142
143 // Detect instructions that explicitly define SP or LR.
144 bool ModifiesLR = false;
145 bool ModifiesSP = false;
146 for (const MachineOperand &MO : MI.defs()) {
147 if (!MO.isReg())
148 continue;
149 if (MO.getReg() == AArch64::LR)
150 ModifiesLR = true;
151 else if (MO.getReg() == AArch64::SP)
152 ModifiesSP = true;
153 }
154
155 // Memory accesses expand to a base-register guard plus the rewritten access
156 // (8 bytes), with an extra base-register update for pre/post-index forms (12
157 // bytes total). If the access also defines LR, an LR mask is appended (+4
158 // bytes). Depending on additional optimizations that the rewriter performs,
159 // this may be an overestimate.
160 if (MI.mayLoadOrStore()) {
161 unsigned Size = isLFIPrePostMemAccess(MI.getOpcode()) ? 12 : 8;
162 if (ModifiesLR)
163 Size += 4;
164 return Size;
165 }
166
167 // Non memory operations that modify LR or SP expand to 2 instructions.
168 if (ModifiesSP || ModifiesLR)
169 return 8;
170
171 // Default case: instructions that don't cause expansion.
172 // - TP accesses in LFI are a single load/store, so no expansion.
173 // - All remaining instructions are not rewritten.
174 return std::nullopt;
175}
176
177/// GetInstSize - Return the number of bytes of code the specified
178/// instruction may be. This returns the maximum number of bytes.
180 const MachineBasicBlock &MBB = *MI.getParent();
181 const MachineFunction *MF = MBB.getParent();
182 const Function &F = MF->getFunction();
183 const MCAsmInfo &MAI = MF->getTarget().getMCAsmInfo();
184
185 {
186 auto Op = MI.getOpcode();
187 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
188 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), MAI);
189 }
190
191 // Meta-instructions emit no code.
192 if (MI.isMetaInstruction())
193 return 0;
194
195 // FIXME: We currently only handle pseudoinstructions that don't get expanded
196 // before the assembly printer.
197 unsigned NumBytes = 0;
198 const MCInstrDesc &Desc = MI.getDesc();
199
200 // LFI rewriter expansions that supersede normal sizing.
201 const auto &STI = MF->getSubtarget<AArch64Subtarget>();
202 if (STI.isLFI())
203 if (auto Size = getLFIInstSizeInBytes(MI))
204 return *Size;
205
206 if (!MI.isBundle() && isTailCallReturnInst(MI)) {
207 NumBytes = Desc.getSize() ? Desc.getSize() : 4;
208
209 const auto *MFI = MF->getInfo<AArch64FunctionInfo>();
210 if (!MFI->shouldSignReturnAddress(*MF))
211 return NumBytes;
212
213 auto Method = STI.getAuthenticatedLRCheckMethod(*MF);
214 NumBytes += AArch64PAuth::getCheckerSizeInBytes(Method);
215 return NumBytes;
216 }
217
218 // Size should be preferably set in
219 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
220 // Specific cases handle instructions of variable sizes
221 switch (Desc.getOpcode()) {
222 default:
223 if (Desc.getSize())
224 return Desc.getSize();
225
226 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
227 // with fixed constant size but not specified in .td file) is a normal
228 // 4-byte insn.
229 NumBytes = 4;
230 break;
231 case TargetOpcode::STACKMAP:
232 // The upper bound for a stackmap intrinsic is the full length of its shadow
233 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
234 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
235 break;
236 case TargetOpcode::PATCHPOINT:
237 // The size of the patchpoint intrinsic is the number of bytes requested
238 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
239 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
240 break;
241 case TargetOpcode::STATEPOINT:
242 NumBytes = StatepointOpers(&MI).getNumPatchBytes();
243 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
244 // No patch bytes means a normal call inst is emitted
245 if (NumBytes == 0)
246 NumBytes = 4;
247 break;
248 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
249 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
250 // instructions are expanded to the specified number of NOPs. Otherwise,
251 // they are expanded to 36-byte XRay sleds.
252 NumBytes =
253 F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
254 break;
255 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
256 case TargetOpcode::PATCHABLE_TAIL_CALL:
257 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
258 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
259 NumBytes = 36;
260 break;
261 case TargetOpcode::PATCHABLE_EVENT_CALL:
262 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
263 NumBytes = 24;
264 break;
265
266 case AArch64::SPACE:
267 NumBytes = MI.getOperand(1).getImm();
268 break;
269 case AArch64::MOVaddr:
270 case AArch64::MOVaddrJT:
271 case AArch64::MOVaddrCP:
272 case AArch64::MOVaddrBA:
273 case AArch64::MOVaddrTLS:
274 case AArch64::MOVaddrEXT: {
275 // Use the same logic as the pseudo expansion to count instructions.
278 MI.getOperand(1).getTargetFlags(),
279 Subtarget.isTargetMachO(), Insn);
280 NumBytes = Insn.size() * 4;
281 break;
282 }
283
284 case AArch64::MOVi32imm:
285 case AArch64::MOVi64imm: {
286 // Use the same logic as the pseudo expansion to count instructions.
287 unsigned BitSize = Desc.getOpcode() == AArch64::MOVi32imm ? 32 : 64;
289 AArch64_IMM::expandMOVImm(MI.getOperand(1).getImm(), BitSize, Insn);
290 NumBytes = Insn.size() * 4;
291 break;
292 }
293
294 case TargetOpcode::BUNDLE:
295 NumBytes = getInstBundleSize(MI);
296 break;
297 }
298
299 return NumBytes;
300}
301
304 // Block ends with fall-through condbranch.
305 switch (LastInst->getOpcode()) {
306 default:
307 llvm_unreachable("Unknown branch instruction?");
308 case AArch64::Bcc:
309 Target = LastInst->getOperand(1).getMBB();
310 Cond.push_back(LastInst->getOperand(0));
311 break;
312 case AArch64::CBZW:
313 case AArch64::CBZX:
314 case AArch64::CBNZW:
315 case AArch64::CBNZX:
316 Target = LastInst->getOperand(1).getMBB();
317 Cond.push_back(MachineOperand::CreateImm(-1));
318 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
319 Cond.push_back(LastInst->getOperand(0));
320 break;
321 case AArch64::TBZW:
322 case AArch64::TBZX:
323 case AArch64::TBNZW:
324 case AArch64::TBNZX:
325 Target = LastInst->getOperand(2).getMBB();
326 Cond.push_back(MachineOperand::CreateImm(-1));
327 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
328 Cond.push_back(LastInst->getOperand(0));
329 Cond.push_back(LastInst->getOperand(1));
330 break;
331 case AArch64::CBWPri:
332 case AArch64::CBXPri:
333 case AArch64::CBWPrr:
334 case AArch64::CBXPrr:
335 Target = LastInst->getOperand(3).getMBB();
336 Cond.push_back(MachineOperand::CreateImm(-1));
337 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
338 Cond.push_back(LastInst->getOperand(0));
339 Cond.push_back(LastInst->getOperand(1));
340 Cond.push_back(LastInst->getOperand(2));
341 break;
342 case AArch64::CBBAssertExt:
343 case AArch64::CBHAssertExt:
344 Target = LastInst->getOperand(3).getMBB();
345 Cond.push_back(MachineOperand::CreateImm(-1)); // -1
346 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); // Opc
347 Cond.push_back(LastInst->getOperand(0)); // Cond
348 Cond.push_back(LastInst->getOperand(1)); // Op0
349 Cond.push_back(LastInst->getOperand(2)); // Op1
350 Cond.push_back(LastInst->getOperand(4)); // Ext0
351 Cond.push_back(LastInst->getOperand(5)); // Ext1
352 break;
353 }
354}
355
356static unsigned getBranchDisplacementBits(unsigned Opc) {
357 switch (Opc) {
358 default:
359 llvm_unreachable("unexpected opcode!");
360 case AArch64::B:
361 return BDisplacementBits;
362 case AArch64::TBNZW:
363 case AArch64::TBZW:
364 case AArch64::TBNZX:
365 case AArch64::TBZX:
366 return TBZDisplacementBits;
367 case AArch64::CBNZW:
368 case AArch64::CBZW:
369 case AArch64::CBNZX:
370 case AArch64::CBZX:
371 return CBZDisplacementBits;
372 case AArch64::Bcc:
373 return BCCDisplacementBits;
374 case AArch64::CBWPri:
375 case AArch64::CBXPri:
376 case AArch64::CBBAssertExt:
377 case AArch64::CBHAssertExt:
378 case AArch64::CBWPrr:
379 case AArch64::CBXPrr:
380 return CBDisplacementBits;
381 }
382}
383
385 int64_t BrOffset) const {
386 unsigned Bits = getBranchDisplacementBits(BranchOp);
387 assert(Bits >= 3 && "max branch displacement must be enough to jump"
388 "over conditional branch expansion");
389 return isIntN(Bits, BrOffset / 4);
390}
391
394 switch (MI.getOpcode()) {
395 default:
396 llvm_unreachable("unexpected opcode!");
397 case AArch64::B:
398 return MI.getOperand(0).getMBB();
399 case AArch64::TBZW:
400 case AArch64::TBNZW:
401 case AArch64::TBZX:
402 case AArch64::TBNZX:
403 return MI.getOperand(2).getMBB();
404 case AArch64::CBZW:
405 case AArch64::CBNZW:
406 case AArch64::CBZX:
407 case AArch64::CBNZX:
408 case AArch64::Bcc:
409 return MI.getOperand(1).getMBB();
410 case AArch64::CBWPri:
411 case AArch64::CBXPri:
412 case AArch64::CBBAssertExt:
413 case AArch64::CBHAssertExt:
414 case AArch64::CBWPrr:
415 case AArch64::CBXPrr:
416 return MI.getOperand(3).getMBB();
417 }
418}
419
421 MachineBasicBlock &NewDestBB,
422 MachineBasicBlock &RestoreBB,
423 const DebugLoc &DL,
424 int64_t BrOffset,
425 RegScavenger *RS) const {
426 assert(RS && "RegScavenger required for long branching");
427 assert(MBB.empty() &&
428 "new block should be inserted for expanding unconditional branch");
429 assert(MBB.pred_size() == 1);
430 assert(RestoreBB.empty() &&
431 "restore block should be inserted for restoring clobbered registers");
432
433 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
434 // Offsets outside of the signed 33-bit range are not supported for ADRP +
435 // ADD.
436 if (!isInt<33>(BrOffset))
438 "Branch offsets outside of the signed 33-bit range not supported");
439
440 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
441 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
442 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
443 .addReg(Reg)
444 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
445 .addImm(0);
446 BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
447 };
448
449 RS->enterBasicBlockEnd(MBB);
450 // If X16 is unused, we can rely on the linker to insert a range extension
451 // thunk if NewDestBB is out of range of a single B instruction.
452 constexpr Register Reg = AArch64::X16;
453 if (!RS->isRegUsed(Reg)) {
454 insertUnconditionalBranch(MBB, &NewDestBB, DL);
455 RS->setRegUsed(Reg);
456 return;
457 }
458
459 // In a cold block without BTI, insert the indirect branch if a register is
460 // free. Skip this if BTI is enabled to avoid inserting a BTI at the target,
461 // prioritizing a dynamic cost in cold code over a static cost in hot code.
462 AArch64FunctionInfo *AFI = MBB.getParent()->getInfo<AArch64FunctionInfo>();
463 bool HasBTI = AFI && AFI->branchTargetEnforcement();
464 if (MBB.getSectionID() == MBBSectionID::ColdSectionID && !HasBTI) {
465 Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
466 if (Scavenged != AArch64::NoRegister) {
467 buildIndirectBranch(Scavenged, NewDestBB);
468 RS->setRegUsed(Scavenged);
469 return;
470 }
471 }
472
473 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
474 // with red zones.
475 if (!AFI || AFI->hasRedZone().value_or(true))
477 "Unable to insert indirect branch inside function that has red zone");
478
479 // Otherwise, spill X16 and defer range extension to the linker.
480 BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
481 .addReg(AArch64::SP, RegState::Define)
482 .addReg(Reg)
483 .addReg(AArch64::SP)
484 .addImm(-16);
485
486 BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
487
488 BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
489 .addReg(AArch64::SP, RegState::Define)
491 .addReg(AArch64::SP)
492 .addImm(16);
493}
494
495// Branch analysis.
498 MachineBasicBlock *&FBB,
500 bool AllowModify) const {
501 // If the block has no terminators, it just falls into the block after it.
502 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
503 if (I == MBB.end())
504 return false;
505
506 // Skip over SpeculationBarrierEndBB terminators
507 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
508 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
509 --I;
510 }
511
512 if (!isUnpredicatedTerminator(*I))
513 return false;
514
515 // Get the last instruction in the block.
516 MachineInstr *LastInst = &*I;
517
518 // If there is only one terminator instruction, process it.
519 unsigned LastOpc = LastInst->getOpcode();
520 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
521 if (isUncondBranchOpcode(LastOpc)) {
522 TBB = LastInst->getOperand(0).getMBB();
523 return false;
524 }
525 if (isCondBranchOpcode(LastOpc)) {
526 // Block ends with fall-through condbranch.
527 parseCondBranch(LastInst, TBB, Cond);
528 return false;
529 }
530 return true; // Can't handle indirect branch.
531 }
532
533 // Get the instruction before it if it is a terminator.
534 MachineInstr *SecondLastInst = &*I;
535 unsigned SecondLastOpc = SecondLastInst->getOpcode();
536
537 // If AllowModify is true and the block ends with two or more unconditional
538 // branches, delete all but the first unconditional branch.
539 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
540 while (isUncondBranchOpcode(SecondLastOpc)) {
541 LastInst->eraseFromParent();
542 LastInst = SecondLastInst;
543 LastOpc = LastInst->getOpcode();
544 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
545 // Return now the only terminator is an unconditional branch.
546 TBB = LastInst->getOperand(0).getMBB();
547 return false;
548 }
549 SecondLastInst = &*I;
550 SecondLastOpc = SecondLastInst->getOpcode();
551 }
552 }
553
554 // If we're allowed to modify and the block ends in a unconditional branch
555 // which could simply fallthrough, remove the branch. (Note: This case only
556 // matters when we can't understand the whole sequence, otherwise it's also
557 // handled by BranchFolding.cpp.)
558 if (AllowModify && isUncondBranchOpcode(LastOpc) &&
559 MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) {
560 LastInst->eraseFromParent();
561 LastInst = SecondLastInst;
562 LastOpc = LastInst->getOpcode();
563 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
564 assert(!isUncondBranchOpcode(LastOpc) &&
565 "unreachable unconditional branches removed above");
566
567 if (isCondBranchOpcode(LastOpc)) {
568 // Block ends with fall-through condbranch.
569 parseCondBranch(LastInst, TBB, Cond);
570 return false;
571 }
572 return true; // Can't handle indirect branch.
573 }
574 SecondLastInst = &*I;
575 SecondLastOpc = SecondLastInst->getOpcode();
576 }
577
578 // If there are three terminators, we don't know what sort of block this is.
579 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
580 return true;
581
582 // If the block ends with a B and a Bcc, handle it.
583 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
584 parseCondBranch(SecondLastInst, TBB, Cond);
585 FBB = LastInst->getOperand(0).getMBB();
586 return false;
587 }
588
589 // If the block ends with two unconditional branches, handle it. The second
590 // one is not executed, so remove it.
591 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
592 TBB = SecondLastInst->getOperand(0).getMBB();
593 I = LastInst;
594 if (AllowModify)
595 I->eraseFromParent();
596 return false;
597 }
598
599 // ...likewise if it ends with an indirect branch followed by an unconditional
600 // branch.
601 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
602 I = LastInst;
603 if (AllowModify)
604 I->eraseFromParent();
605 return true;
606 }
607
608 // Otherwise, can't handle this.
609 return true;
610}
611
613 MachineBranchPredicate &MBP,
614 bool AllowModify) const {
615 // Use analyzeBranch to validate the branch pattern.
616 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
618 if (analyzeBranch(MBB, TBB, FBB, Cond, AllowModify))
619 return true;
620
621 // analyzeBranch returns success with empty Cond for unconditional branches.
622 if (Cond.empty())
623 return true;
624
625 MBP.TrueDest = TBB;
626 assert(MBP.TrueDest && "expected!");
627 MBP.FalseDest = FBB ? FBB : MBB.getNextNode();
628
629 MBP.ConditionDef = nullptr;
630 MBP.SingleUseCondition = false;
631
632 // Find the conditional branch. After analyzeBranch succeeds with non-empty
633 // Cond, there's exactly one conditional branch - either last (fallthrough)
634 // or second-to-last (followed by unconditional B).
635 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
636 if (I == MBB.end())
637 return true;
638
639 if (isUncondBranchOpcode(I->getOpcode())) {
640 if (I == MBB.begin())
641 return true;
642 --I;
643 }
644
645 MachineInstr *CondBranch = &*I;
646 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
647
648 switch (CondBranch->getOpcode()) {
649 default:
650 return true;
651
652 case AArch64::Bcc:
653 // Bcc takes the NZCV flag as the operand to branch on, walk up the
654 // instruction stream to find the last instruction to define NZCV.
656 if (MI.modifiesRegister(AArch64::NZCV, /*TRI=*/nullptr)) {
657 MBP.ConditionDef = &MI;
658 break;
659 }
660 }
661 return false;
662
663 case AArch64::CBZW:
664 case AArch64::CBZX:
665 case AArch64::CBNZW:
666 case AArch64::CBNZX: {
667 MBP.LHS = CondBranch->getOperand(0);
668 MBP.RHS = MachineOperand::CreateImm(0);
669 unsigned Opc = CondBranch->getOpcode();
670 MBP.Predicate = (Opc == AArch64::CBNZX || Opc == AArch64::CBNZW)
671 ? MachineBranchPredicate::PRED_NE
672 : MachineBranchPredicate::PRED_EQ;
673 Register CondReg = MBP.LHS.getReg();
674 if (CondReg.isVirtual())
675 MBP.ConditionDef = MRI.getVRegDef(CondReg);
676 return false;
677 }
678
679 case AArch64::TBZW:
680 case AArch64::TBZX:
681 case AArch64::TBNZW:
682 case AArch64::TBNZX: {
683 Register CondReg = CondBranch->getOperand(0).getReg();
684 if (CondReg.isVirtual())
685 MBP.ConditionDef = MRI.getVRegDef(CondReg);
686 return false;
687 }
688 }
689}
690
693 if (Cond[0].getImm() != -1) {
694 // Regular Bcc
695 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
697 } else {
698 // Folded compare-and-branch
699 switch (Cond[1].getImm()) {
700 default:
701 llvm_unreachable("Unknown conditional branch!");
702 case AArch64::CBZW:
703 Cond[1].setImm(AArch64::CBNZW);
704 break;
705 case AArch64::CBNZW:
706 Cond[1].setImm(AArch64::CBZW);
707 break;
708 case AArch64::CBZX:
709 Cond[1].setImm(AArch64::CBNZX);
710 break;
711 case AArch64::CBNZX:
712 Cond[1].setImm(AArch64::CBZX);
713 break;
714 case AArch64::TBZW:
715 Cond[1].setImm(AArch64::TBNZW);
716 break;
717 case AArch64::TBNZW:
718 Cond[1].setImm(AArch64::TBZW);
719 break;
720 case AArch64::TBZX:
721 Cond[1].setImm(AArch64::TBNZX);
722 break;
723 case AArch64::TBNZX:
724 Cond[1].setImm(AArch64::TBZX);
725 break;
726
727 // Cond is { -1, Opcode, CC, Op0, Op1, ... }
728 case AArch64::CBWPri:
729 case AArch64::CBXPri:
730 case AArch64::CBBAssertExt:
731 case AArch64::CBHAssertExt:
732 case AArch64::CBWPrr:
733 case AArch64::CBXPrr: {
734 // Pseudos using standard 4bit Arm condition codes
736 static_cast<AArch64CC::CondCode>(Cond[2].getImm());
738 }
739 }
740 }
741
742 return false;
743}
744
746 int *BytesRemoved) const {
747 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
748 if (I == MBB.end())
749 return 0;
750
751 if (!isUncondBranchOpcode(I->getOpcode()) &&
752 !isCondBranchOpcode(I->getOpcode()))
753 return 0;
754
755 // Remove the branch.
756 I->eraseFromParent();
757
758 I = MBB.end();
759
760 if (I == MBB.begin()) {
761 if (BytesRemoved)
762 *BytesRemoved = 4;
763 return 1;
764 }
765 --I;
766 if (!isCondBranchOpcode(I->getOpcode())) {
767 if (BytesRemoved)
768 *BytesRemoved = 4;
769 return 1;
770 }
771
772 // Remove the branch.
773 I->eraseFromParent();
774 if (BytesRemoved)
775 *BytesRemoved = 8;
776
777 return 2;
778}
779
780void AArch64InstrInfo::instantiateCondBranch(
783 if (Cond[0].getImm() != -1) {
784 // Regular Bcc
785 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
786 } else {
787 // Folded compare-and-branch
788 // Note that we use addOperand instead of addReg to keep the flags.
789
790 // cbz, cbnz
791 const MachineInstrBuilder MIB =
792 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
793
794 // tbz/tbnz
795 if (Cond.size() > 3)
796 MIB.add(Cond[3]);
797
798 // cb
799 if (Cond.size() > 4)
800 MIB.add(Cond[4]);
801
802 MIB.addMBB(TBB);
803
804 // cb[b,h]
805 if (Cond.size() > 5) {
806 MIB.addImm(Cond[5].getImm());
807 MIB.addImm(Cond[6].getImm());
808 }
809 }
810}
811
814 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
815 // Shouldn't be a fall through.
816 assert(TBB && "insertBranch must not be told to insert a fallthrough");
817
818 if (!FBB) {
819 if (Cond.empty()) // Unconditional branch?
820 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
821 else
822 instantiateCondBranch(MBB, DL, TBB, Cond);
823
824 if (BytesAdded)
825 *BytesAdded = 4;
826
827 return 1;
828 }
829
830 // Two-way conditional branch.
831 instantiateCondBranch(MBB, DL, TBB, Cond);
832 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
833
834 if (BytesAdded)
835 *BytesAdded = 8;
836
837 return 2;
838}
839
841 const TargetInstrInfo &TII) {
842 for (MachineInstr &MI : MBB->terminators()) {
843 unsigned Opc = MI.getOpcode();
844 switch (Opc) {
845 case AArch64::CBZW:
846 case AArch64::CBZX:
847 case AArch64::TBZW:
848 case AArch64::TBZX:
849 // CBZ/TBZ with WZR/XZR -> unconditional B
850 if (MI.getOperand(0).getReg() == AArch64::WZR ||
851 MI.getOperand(0).getReg() == AArch64::XZR) {
852 DEBUG_WITH_TYPE("optimizeTerminators",
853 dbgs() << "Removing always taken branch: " << MI);
854 MachineBasicBlock *Target = TII.getBranchDestBlock(MI);
855 SmallVector<MachineBasicBlock *> Succs(MBB->successors());
856 for (auto *S : Succs)
857 if (S != Target)
858 MBB->removeSuccessor(S);
859 DebugLoc DL = MI.getDebugLoc();
860 while (MBB->rbegin() != &MI)
861 MBB->rbegin()->eraseFromParent();
862 MI.eraseFromParent();
863 BuildMI(MBB, DL, TII.get(AArch64::B)).addMBB(Target);
864 return true;
865 }
866 break;
867 case AArch64::CBNZW:
868 case AArch64::CBNZX:
869 case AArch64::TBNZW:
870 case AArch64::TBNZX:
871 // CBNZ/TBNZ with WZR/XZR -> never taken, remove branch and successor
872 if (MI.getOperand(0).getReg() == AArch64::WZR ||
873 MI.getOperand(0).getReg() == AArch64::XZR) {
874 DEBUG_WITH_TYPE("optimizeTerminators",
875 dbgs() << "Removing never taken branch: " << MI);
876 MachineBasicBlock *Target = TII.getBranchDestBlock(MI);
877 MI.getParent()->removeSuccessor(Target);
878 MI.eraseFromParent();
879 return true;
880 }
881 break;
882 }
883 }
884 return false;
885}
886
887// Find the original register that VReg is copied from.
888static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
889 while (Register::isVirtualRegister(VReg)) {
890 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
891 if (!DefMI->isFullCopy())
892 return VReg;
893 VReg = DefMI->getOperand(1).getReg();
894 }
895 return VReg;
896}
897
898// Determine if VReg is defined by an instruction that can be folded into a
899// csel instruction. If so, return the folded opcode, and the replacement
900// register.
901static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
902 unsigned *NewReg = nullptr) {
903 VReg = removeCopies(MRI, VReg);
905 return 0;
906
907 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
908 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
909 unsigned Opc = 0;
910 unsigned SrcReg = 0;
911 switch (DefMI->getOpcode()) {
912 case AArch64::SUBREG_TO_REG:
913 // Check for the following way to define an 64-bit immediate:
914 // %0:gpr32 = MOVi32imm 1
915 // %1:gpr64 = SUBREG_TO_REG %0:gpr32, %subreg.sub_32
916 if (!DefMI->getOperand(1).isReg())
917 return 0;
918 if (!DefMI->getOperand(2).isImm() ||
919 DefMI->getOperand(2).getImm() != AArch64::sub_32)
920 return 0;
921 DefMI = MRI.getVRegDef(DefMI->getOperand(1).getReg());
922 if (DefMI->getOpcode() != AArch64::MOVi32imm)
923 return 0;
924 if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
925 return 0;
926 assert(Is64Bit);
927 SrcReg = AArch64::XZR;
928 Opc = AArch64::CSINCXr;
929 break;
930
931 case AArch64::MOVi32imm:
932 case AArch64::MOVi64imm:
933 if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
934 return 0;
935 SrcReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
936 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
937 break;
938
939 case AArch64::ADDSXri:
940 case AArch64::ADDSWri:
941 // if NZCV is used, do not fold.
942 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
943 true) == -1)
944 return 0;
945 // fall-through to ADDXri and ADDWri.
946 [[fallthrough]];
947 case AArch64::ADDXri:
948 case AArch64::ADDWri:
949 // add x, 1 -> csinc.
950 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
951 DefMI->getOperand(3).getImm() != 0)
952 return 0;
953 SrcReg = DefMI->getOperand(1).getReg();
954 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
955 break;
956
957 case AArch64::ORNXrr:
958 case AArch64::ORNWrr: {
959 // not x -> csinv, represented as orn dst, xzr, src.
960 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
961 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
962 return 0;
963 SrcReg = DefMI->getOperand(2).getReg();
964 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
965 break;
966 }
967
968 case AArch64::SUBSXrr:
969 case AArch64::SUBSWrr:
970 // if NZCV is used, do not fold.
971 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
972 true) == -1)
973 return 0;
974 // fall-through to SUBXrr and SUBWrr.
975 [[fallthrough]];
976 case AArch64::SUBXrr:
977 case AArch64::SUBWrr: {
978 // neg x -> csneg, represented as sub dst, xzr, src.
979 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
980 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
981 return 0;
982 SrcReg = DefMI->getOperand(2).getReg();
983 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
984 break;
985 }
986 default:
987 return 0;
988 }
989 assert(Opc && SrcReg && "Missing parameters");
990
991 if (NewReg)
992 *NewReg = SrcReg;
993 return Opc;
994}
995
998 Register DstReg, Register TrueReg,
999 Register FalseReg, int &CondCycles,
1000 int &TrueCycles,
1001 int &FalseCycles) const {
1002 // Check register classes.
1003 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1004 const TargetRegisterClass *RC =
1005 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
1006 if (!RC)
1007 return false;
1008
1009 // Also need to check the dest regclass, in case we're trying to optimize
1010 // something like:
1011 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
1012 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
1013 return false;
1014
1015 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
1016 unsigned ExtraCondLat = Cond.size() != 1;
1017
1018 // GPRs are handled by csel.
1019 // FIXME: Fold in x+1, -x, and ~x when applicable.
1020 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
1021 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
1022 // Single-cycle csel, csinc, csinv, and csneg.
1023 CondCycles = 1 + ExtraCondLat;
1024 TrueCycles = FalseCycles = 1;
1025 if (canFoldIntoCSel(MRI, TrueReg))
1026 TrueCycles = 0;
1027 else if (canFoldIntoCSel(MRI, FalseReg))
1028 FalseCycles = 0;
1029 return true;
1030 }
1031
1032 // Scalar floating point is handled by fcsel.
1033 // FIXME: Form fabs, fmin, and fmax when applicable.
1034 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
1035 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
1036 CondCycles = 5 + ExtraCondLat;
1037 TrueCycles = FalseCycles = 2;
1038 return true;
1039 }
1040
1041 // Can't do vectors.
1042 return false;
1043}
1044
1047 const DebugLoc &DL, Register DstReg,
1049 Register TrueReg, Register FalseReg) const {
1050 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1051
1052 // Parse the condition code, see parseCondBranch() above.
1054 switch (Cond.size()) {
1055 default:
1056 llvm_unreachable("Unknown condition opcode in Cond");
1057 case 1: // b.cc
1058 CC = AArch64CC::CondCode(Cond[0].getImm());
1059 break;
1060 case 3: { // cbz/cbnz
1061 // We must insert a compare against 0.
1062 bool Is64Bit;
1063 switch (Cond[1].getImm()) {
1064 default:
1065 llvm_unreachable("Unknown branch opcode in Cond");
1066 case AArch64::CBZW:
1067 Is64Bit = false;
1068 CC = AArch64CC::EQ;
1069 break;
1070 case AArch64::CBZX:
1071 Is64Bit = true;
1072 CC = AArch64CC::EQ;
1073 break;
1074 case AArch64::CBNZW:
1075 Is64Bit = false;
1076 CC = AArch64CC::NE;
1077 break;
1078 case AArch64::CBNZX:
1079 Is64Bit = true;
1080 CC = AArch64CC::NE;
1081 break;
1082 }
1083 Register SrcReg = Cond[2].getReg();
1084 if (Is64Bit) {
1085 // cmp reg, #0 is actually subs xzr, reg, #0.
1086 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
1087 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
1088 .addReg(SrcReg)
1089 .addImm(0)
1090 .addImm(0);
1091 } else {
1092 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
1093 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
1094 .addReg(SrcReg)
1095 .addImm(0)
1096 .addImm(0);
1097 }
1098 break;
1099 }
1100 case 4: { // tbz/tbnz
1101 // We must insert a tst instruction.
1102 switch (Cond[1].getImm()) {
1103 default:
1104 llvm_unreachable("Unknown branch opcode in Cond");
1105 case AArch64::TBZW:
1106 case AArch64::TBZX:
1107 CC = AArch64CC::EQ;
1108 break;
1109 case AArch64::TBNZW:
1110 case AArch64::TBNZX:
1111 CC = AArch64CC::NE;
1112 break;
1113 }
1114 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
1115 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
1116 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
1117 .addReg(Cond[2].getReg())
1118 .addImm(
1120 else
1121 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
1122 .addReg(Cond[2].getReg())
1123 .addImm(
1125 break;
1126 }
1127 case 5: { // cb
1128 // We must insert a cmp, that is a subs
1129 // 0 1 2 3 4
1130 // Cond is { -1, Opcode, CC, Op0, Op1 }
1131
1132 unsigned SubsOpc, SubsDestReg;
1133 bool IsImm = false;
1134 CC = static_cast<AArch64CC::CondCode>(Cond[2].getImm());
1135 switch (Cond[1].getImm()) {
1136 default:
1137 llvm_unreachable("Unknown branch opcode in Cond");
1138 case AArch64::CBWPri:
1139 SubsOpc = AArch64::SUBSWri;
1140 SubsDestReg = AArch64::WZR;
1141 IsImm = true;
1142 break;
1143 case AArch64::CBXPri:
1144 SubsOpc = AArch64::SUBSXri;
1145 SubsDestReg = AArch64::XZR;
1146 IsImm = true;
1147 break;
1148 case AArch64::CBWPrr:
1149 SubsOpc = AArch64::SUBSWrr;
1150 SubsDestReg = AArch64::WZR;
1151 IsImm = false;
1152 break;
1153 case AArch64::CBXPrr:
1154 SubsOpc = AArch64::SUBSXrr;
1155 SubsDestReg = AArch64::XZR;
1156 IsImm = false;
1157 break;
1158 }
1159
1160 if (IsImm)
1161 BuildMI(MBB, I, DL, get(SubsOpc), SubsDestReg)
1162 .addReg(Cond[3].getReg())
1163 .addImm(Cond[4].getImm())
1164 .addImm(0);
1165 else
1166 BuildMI(MBB, I, DL, get(SubsOpc), SubsDestReg)
1167 .addReg(Cond[3].getReg())
1168 .addReg(Cond[4].getReg());
1169 } break;
1170 case 7: { // cb[b,h]
1171 // We must insert a cmp, that is a subs, but also zero- or sign-extensions
1172 // that have been folded. For the first operand we codegen an explicit
1173 // extension, for the second operand we fold the extension into cmp.
1174 // 0 1 2 3 4 5 6
1175 // Cond is { -1, Opcode, CC, Op0, Op1, Ext0, Ext1 }
1176
1177 // We need a new register for the now explicitly extended register
1178 Register Reg = Cond[4].getReg();
1180 unsigned ExtOpc;
1181 unsigned ExtBits;
1182 AArch64_AM::ShiftExtendType ExtendType =
1184 switch (ExtendType) {
1185 default:
1186 llvm_unreachable("Unknown shift-extend for CB instruction");
1187 case AArch64_AM::SXTB:
1188 assert(
1189 Cond[1].getImm() == AArch64::CBBAssertExt &&
1190 "Unexpected compare-and-branch instruction for SXTB shift-extend");
1191 ExtOpc = AArch64::SBFMWri;
1192 ExtBits = AArch64_AM::encodeLogicalImmediate(0xff, 32);
1193 break;
1194 case AArch64_AM::SXTH:
1195 assert(
1196 Cond[1].getImm() == AArch64::CBHAssertExt &&
1197 "Unexpected compare-and-branch instruction for SXTH shift-extend");
1198 ExtOpc = AArch64::SBFMWri;
1199 ExtBits = AArch64_AM::encodeLogicalImmediate(0xffff, 32);
1200 break;
1201 case AArch64_AM::UXTB:
1202 assert(
1203 Cond[1].getImm() == AArch64::CBBAssertExt &&
1204 "Unexpected compare-and-branch instruction for UXTB shift-extend");
1205 ExtOpc = AArch64::ANDWri;
1206 ExtBits = AArch64_AM::encodeLogicalImmediate(0xff, 32);
1207 break;
1208 case AArch64_AM::UXTH:
1209 assert(
1210 Cond[1].getImm() == AArch64::CBHAssertExt &&
1211 "Unexpected compare-and-branch instruction for UXTH shift-extend");
1212 ExtOpc = AArch64::ANDWri;
1213 ExtBits = AArch64_AM::encodeLogicalImmediate(0xffff, 32);
1214 break;
1215 }
1216
1217 // Build the explicit extension of the first operand
1218 Reg = MRI.createVirtualRegister(&AArch64::GPR32spRegClass);
1220 BuildMI(MBB, I, DL, get(ExtOpc), Reg).addReg(Cond[4].getReg());
1221 if (ExtOpc != AArch64::ANDWri)
1222 MBBI.addImm(0);
1223 MBBI.addImm(ExtBits);
1224 }
1225
1226 // Now, subs with an extended second operand
1228 AArch64_AM::ShiftExtendType ExtendType =
1230 MRI.constrainRegClass(Reg, MRI.getRegClass(Cond[3].getReg()));
1231 MRI.constrainRegClass(Cond[3].getReg(), &AArch64::GPR32spRegClass);
1232 BuildMI(MBB, I, DL, get(AArch64::SUBSWrx), AArch64::WZR)
1233 .addReg(Cond[3].getReg())
1234 .addReg(Reg)
1235 .addImm(AArch64_AM::getArithExtendImm(ExtendType, 0));
1236 } // If no extension is needed, just a regular subs
1237 else {
1238 MRI.constrainRegClass(Reg, MRI.getRegClass(Cond[3].getReg()));
1239 MRI.constrainRegClass(Cond[3].getReg(), &AArch64::GPR32spRegClass);
1240 BuildMI(MBB, I, DL, get(AArch64::SUBSWrr), AArch64::WZR)
1241 .addReg(Cond[3].getReg())
1242 .addReg(Reg);
1243 }
1244
1245 CC = static_cast<AArch64CC::CondCode>(Cond[2].getImm());
1246 } break;
1247 }
1248
1249 unsigned Opc = 0;
1250 const TargetRegisterClass *RC = nullptr;
1251 bool TryFold = false;
1252 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
1253 RC = &AArch64::GPR64RegClass;
1254 Opc = AArch64::CSELXr;
1255 TryFold = true;
1256 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
1257 RC = &AArch64::GPR32RegClass;
1258 Opc = AArch64::CSELWr;
1259 TryFold = true;
1260 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
1261 RC = &AArch64::FPR64RegClass;
1262 Opc = AArch64::FCSELDrrr;
1263 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
1264 RC = &AArch64::FPR32RegClass;
1265 Opc = AArch64::FCSELSrrr;
1266 }
1267 assert(RC && "Unsupported regclass");
1268
1269 // Try folding simple instructions into the csel.
1270 if (TryFold) {
1271 unsigned NewReg = 0;
1272 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewReg);
1273 if (FoldedOpc) {
1274 // The folded opcodes csinc, csinc and csneg apply the operation to
1275 // FalseReg, so we need to invert the condition.
1277 TrueReg = FalseReg;
1278 } else
1279 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewReg);
1280
1281 // Fold the operation. Leave any dead instructions for DCE to clean up.
1282 if (FoldedOpc) {
1283 FalseReg = NewReg;
1284 Opc = FoldedOpc;
1285 // Extend the live range of NewReg.
1286 MRI.clearKillFlags(NewReg);
1287 }
1288 }
1289
1290 // Pull all virtual register into the appropriate class.
1291 MRI.constrainRegClass(TrueReg, RC);
1292 // FalseReg might be WZR or XZR if the folded operand is a literal 1.
1293 assert(
1294 (FalseReg.isVirtual() || FalseReg == AArch64::WZR ||
1295 FalseReg == AArch64::XZR) &&
1296 "FalseReg was folded into a non-virtual register other than WZR or XZR");
1297 if (FalseReg.isVirtual())
1298 MRI.constrainRegClass(FalseReg, RC);
1299
1300 // Insert the csel.
1301 BuildMI(MBB, I, DL, get(Opc), DstReg)
1302 .addReg(TrueReg)
1303 .addReg(FalseReg)
1304 .addImm(CC);
1305}
1306
1307// Return true if Imm can be loaded into a register by a "cheap" sequence of
1308// instructions. For now, "cheap" means at most two instructions.
1309static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
1310 if (BitSize == 32)
1311 return true;
1312
1313 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
1314 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
1316 AArch64_IMM::expandMOVImm(Imm, BitSize, Is);
1317
1318 return Is.size() <= 2;
1319}
1320
1321// Check if a COPY instruction is cheap.
1322static bool isCheapCopy(const MachineInstr &MI, const AArch64RegisterInfo &RI) {
1323 assert(MI.isCopy() && "Expected COPY instruction");
1324 const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
1325
1326 // Cross-bank copies (e.g., between GPR and FPR) are expensive on AArch64,
1327 // typically requiring an FMOV instruction with a 2-6 cycle latency.
1328 auto GetRegClass = [&](Register Reg) -> const TargetRegisterClass * {
1329 if (Reg.isVirtual())
1330 return MRI.getRegClass(Reg);
1331 if (Reg.isPhysical())
1332 return RI.getMinimalPhysRegClass(Reg);
1333 return nullptr;
1334 };
1335 const TargetRegisterClass *DstRC = GetRegClass(MI.getOperand(0).getReg());
1336 const TargetRegisterClass *SrcRC = GetRegClass(MI.getOperand(1).getReg());
1337 if (DstRC && SrcRC && !RI.getCommonSubClass(DstRC, SrcRC))
1338 return false;
1339
1340 return MI.isAsCheapAsAMove();
1341}
1342
1343// FIXME: this implementation should be micro-architecture dependent, so a
1344// micro-architecture target hook should be introduced here in future.
1346 if (Subtarget.hasExynosCheapAsMoveHandling()) {
1347 if (isExynosCheapAsMove(MI))
1348 return true;
1349 return MI.isAsCheapAsAMove();
1350 }
1351
1352 switch (MI.getOpcode()) {
1353 default:
1354 return MI.isAsCheapAsAMove();
1355
1356 case TargetOpcode::COPY:
1357 return isCheapCopy(MI, RI);
1358
1359 case AArch64::ADDWrs:
1360 case AArch64::ADDXrs:
1361 case AArch64::SUBWrs:
1362 case AArch64::SUBXrs:
1363 return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;
1364
1365 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
1366 // ORRXri, it is as cheap as MOV.
1367 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
1368 case AArch64::MOVi32imm:
1369 return isCheapImmediate(MI, 32);
1370 case AArch64::MOVi64imm:
1371 return isCheapImmediate(MI, 64);
1372 }
1373}
1374
1375bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
1376 switch (MI.getOpcode()) {
1377 default:
1378 return false;
1379
1380 case AArch64::ADDWrs:
1381 case AArch64::ADDXrs:
1382 case AArch64::ADDSWrs:
1383 case AArch64::ADDSXrs: {
1384 unsigned Imm = MI.getOperand(3).getImm();
1385 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1386 if (ShiftVal == 0)
1387 return true;
1388 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
1389 }
1390
1391 case AArch64::ADDWrx:
1392 case AArch64::ADDXrx:
1393 case AArch64::ADDXrx64:
1394 case AArch64::ADDSWrx:
1395 case AArch64::ADDSXrx:
1396 case AArch64::ADDSXrx64: {
1397 unsigned Imm = MI.getOperand(3).getImm();
1398 switch (AArch64_AM::getArithExtendType(Imm)) {
1399 default:
1400 return false;
1401 case AArch64_AM::UXTB:
1402 case AArch64_AM::UXTH:
1403 case AArch64_AM::UXTW:
1404 case AArch64_AM::UXTX:
1405 return AArch64_AM::getArithShiftValue(Imm) <= 4;
1406 }
1407 }
1408
1409 case AArch64::SUBWrs:
1410 case AArch64::SUBSWrs: {
1411 unsigned Imm = MI.getOperand(3).getImm();
1412 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1413 return ShiftVal == 0 ||
1414 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
1415 }
1416
1417 case AArch64::SUBXrs:
1418 case AArch64::SUBSXrs: {
1419 unsigned Imm = MI.getOperand(3).getImm();
1420 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1421 return ShiftVal == 0 ||
1422 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
1423 }
1424
1425 case AArch64::SUBWrx:
1426 case AArch64::SUBXrx:
1427 case AArch64::SUBXrx64:
1428 case AArch64::SUBSWrx:
1429 case AArch64::SUBSXrx:
1430 case AArch64::SUBSXrx64: {
1431 unsigned Imm = MI.getOperand(3).getImm();
1432 switch (AArch64_AM::getArithExtendType(Imm)) {
1433 default:
1434 return false;
1435 case AArch64_AM::UXTB:
1436 case AArch64_AM::UXTH:
1437 case AArch64_AM::UXTW:
1438 case AArch64_AM::UXTX:
1439 return AArch64_AM::getArithShiftValue(Imm) == 0;
1440 }
1441 }
1442
1443 case AArch64::LDRBBroW:
1444 case AArch64::LDRBBroX:
1445 case AArch64::LDRBroW:
1446 case AArch64::LDRBroX:
1447 case AArch64::LDRDroW:
1448 case AArch64::LDRDroX:
1449 case AArch64::LDRHHroW:
1450 case AArch64::LDRHHroX:
1451 case AArch64::LDRHroW:
1452 case AArch64::LDRHroX:
1453 case AArch64::LDRQroW:
1454 case AArch64::LDRQroX:
1455 case AArch64::LDRSBWroW:
1456 case AArch64::LDRSBWroX:
1457 case AArch64::LDRSBXroW:
1458 case AArch64::LDRSBXroX:
1459 case AArch64::LDRSHWroW:
1460 case AArch64::LDRSHWroX:
1461 case AArch64::LDRSHXroW:
1462 case AArch64::LDRSHXroX:
1463 case AArch64::LDRSWroW:
1464 case AArch64::LDRSWroX:
1465 case AArch64::LDRSroW:
1466 case AArch64::LDRSroX:
1467 case AArch64::LDRWroW:
1468 case AArch64::LDRWroX:
1469 case AArch64::LDRXroW:
1470 case AArch64::LDRXroX:
1471 case AArch64::PRFMroW:
1472 case AArch64::PRFMroX:
1473 case AArch64::STRBBroW:
1474 case AArch64::STRBBroX:
1475 case AArch64::STRBroW:
1476 case AArch64::STRBroX:
1477 case AArch64::STRDroW:
1478 case AArch64::STRDroX:
1479 case AArch64::STRHHroW:
1480 case AArch64::STRHHroX:
1481 case AArch64::STRHroW:
1482 case AArch64::STRHroX:
1483 case AArch64::STRQroW:
1484 case AArch64::STRQroX:
1485 case AArch64::STRSroW:
1486 case AArch64::STRSroX:
1487 case AArch64::STRWroW:
1488 case AArch64::STRWroX:
1489 case AArch64::STRXroW:
1490 case AArch64::STRXroX: {
1491 unsigned IsSigned = MI.getOperand(3).getImm();
1492 return !IsSigned;
1493 }
1494 }
1495}
1496
1497bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
1498 unsigned Opc = MI.getOpcode();
1499 switch (Opc) {
1500 default:
1501 return false;
1502 case AArch64::SEH_StackAlloc:
1503 case AArch64::SEH_SaveFPLR:
1504 case AArch64::SEH_SaveFPLR_X:
1505 case AArch64::SEH_SaveReg:
1506 case AArch64::SEH_SaveReg_X:
1507 case AArch64::SEH_SaveRegP:
1508 case AArch64::SEH_SaveRegP_X:
1509 case AArch64::SEH_SaveFReg:
1510 case AArch64::SEH_SaveFReg_X:
1511 case AArch64::SEH_SaveFRegP:
1512 case AArch64::SEH_SaveFRegP_X:
1513 case AArch64::SEH_SetFP:
1514 case AArch64::SEH_AddFP:
1515 case AArch64::SEH_Nop:
1516 case AArch64::SEH_PrologEnd:
1517 case AArch64::SEH_EpilogStart:
1518 case AArch64::SEH_EpilogEnd:
1519 case AArch64::SEH_PACSignLR:
1520 case AArch64::SEH_SaveAnyRegI:
1521 case AArch64::SEH_SaveAnyRegIP:
1522 case AArch64::SEH_SaveAnyRegQP:
1523 case AArch64::SEH_SaveAnyRegQPX:
1524 case AArch64::SEH_AllocZ:
1525 case AArch64::SEH_SaveZReg:
1526 case AArch64::SEH_SavePReg:
1527 return true;
1528 }
1529}
1530
1532 Register &SrcReg, Register &DstReg,
1533 unsigned &SubIdx) const {
1534 switch (MI.getOpcode()) {
1535 default:
1536 return false;
1537 case AArch64::SBFMXri: // aka sxtw
1538 case AArch64::UBFMXri: // aka uxtw
1539 // Check for the 32 -> 64 bit extension case, these instructions can do
1540 // much more.
1541 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1542 return false;
1543 // This is a signed or unsigned 32 -> 64 bit extension.
1544 SrcReg = MI.getOperand(1).getReg();
1545 DstReg = MI.getOperand(0).getReg();
1546 SubIdx = AArch64::sub_32;
1547 return true;
1548 }
1549}
1550
1552 const MachineInstr &MIa, const MachineInstr &MIb) const {
1554 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1555 int64_t OffsetA = 0, OffsetB = 0;
1556 TypeSize WidthA(0, false), WidthB(0, false);
1557 bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1558
1559 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1560 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1561
1564 return false;
1565
1566 // Retrieve the base, offset from the base and width. Width
1567 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1568 // base are identical, and the offset of a lower memory access +
1569 // the width doesn't overlap the offset of a higher memory access,
1570 // then the memory accesses are different.
1571 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1572 // are assumed to have the same scale (vscale).
1573 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1574 WidthA, TRI) &&
1575 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1576 WidthB, TRI)) {
1577 if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1578 OffsetAIsScalable == OffsetBIsScalable) {
1579 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1580 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1581 TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1582 if (LowWidth.isScalable() == OffsetAIsScalable &&
1583 LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1584 return true;
1585 }
1586 }
1587 return false;
1588}
1589
1591 const MachineBasicBlock *MBB,
1592 const MachineFunction &MF) const {
1594 return true;
1595
1596 // Do not move an instruction that can be recognized as a branch target.
1597 if (hasBTISemantics(MI))
1598 return true;
1599
1600 switch (MI.getOpcode()) {
1601 case AArch64::HINT:
1602 // CSDB hints are scheduling barriers.
1603 if (MI.getOperand(0).getImm() == 0x14)
1604 return true;
1605 break;
1606 case AArch64::DSB:
1607 case AArch64::ISB:
1608 // DSB and ISB also are scheduling barriers.
1609 return true;
1610 case AArch64::MSRpstatesvcrImm1:
1611 // SMSTART and SMSTOP are also scheduling barriers.
1612 return true;
1613 default:;
1614 }
1615 if (isSEHInstruction(MI))
1616 return true;
1617 auto Next = std::next(MI.getIterator());
1618 return Next != MBB->end() && Next->isCFIInstruction();
1619}
1620
1621/// analyzeCompare - For a comparison instruction, return the source registers
1622/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1623/// Return true if the comparison instruction can be analyzed.
1625 Register &SrcReg2, int64_t &CmpMask,
1626 int64_t &CmpValue) const {
1627 // The first operand can be a frame index where we'd normally expect a
1628 // register.
1629 // FIXME: Pass subregisters out of analyzeCompare
1630 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1631 if (!MI.getOperand(1).isReg() || MI.getOperand(1).getSubReg())
1632 return false;
1633
1634 switch (MI.getOpcode()) {
1635 default:
1636 break;
1637 case AArch64::PTEST_PP:
1638 case AArch64::PTEST_PP_ANY:
1639 case AArch64::PTEST_PP_FIRST:
1640 SrcReg = MI.getOperand(0).getReg();
1641 SrcReg2 = MI.getOperand(1).getReg();
1642 if (MI.getOperand(2).getSubReg())
1643 return false;
1644
1645 // Not sure about the mask and value for now...
1646 CmpMask = ~0;
1647 CmpValue = 0;
1648 return true;
1649 case AArch64::SUBSWrr:
1650 case AArch64::SUBSWrs:
1651 case AArch64::SUBSWrx:
1652 case AArch64::SUBSXrr:
1653 case AArch64::SUBSXrs:
1654 case AArch64::SUBSXrx:
1655 case AArch64::ADDSWrr:
1656 case AArch64::ADDSWrs:
1657 case AArch64::ADDSWrx:
1658 case AArch64::ADDSXrr:
1659 case AArch64::ADDSXrs:
1660 case AArch64::ADDSXrx:
1661 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1662 SrcReg = MI.getOperand(1).getReg();
1663 SrcReg2 = MI.getOperand(2).getReg();
1664
1665 // FIXME: Pass subregisters out of analyzeCompare
1666 if (MI.getOperand(2).getSubReg())
1667 return false;
1668
1669 CmpMask = ~0;
1670 CmpValue = 0;
1671 return true;
1672 case AArch64::SUBSWri:
1673 case AArch64::ADDSWri:
1674 case AArch64::SUBSXri:
1675 case AArch64::ADDSXri:
1676 SrcReg = MI.getOperand(1).getReg();
1677 SrcReg2 = 0;
1678 CmpMask = ~0;
1679 CmpValue = MI.getOperand(2).getImm();
1680 return true;
1681 case AArch64::ANDSWri:
1682 case AArch64::ANDSXri:
1683 // ANDS does not use the same encoding scheme as the others xxxS
1684 // instructions.
1685 SrcReg = MI.getOperand(1).getReg();
1686 SrcReg2 = 0;
1687 CmpMask = ~0;
1689 MI.getOperand(2).getImm(),
1690 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1691 return true;
1692 }
1693
1694 return false;
1695}
1696
1698 MachineBasicBlock *MBB = Instr.getParent();
1699 assert(MBB && "Can't get MachineBasicBlock here");
1700 MachineFunction *MF = MBB->getParent();
1701 assert(MF && "Can't get MachineFunction here");
1704 MachineRegisterInfo *MRI = &MF->getRegInfo();
1705
1706 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1707 ++OpIdx) {
1708 MachineOperand &MO = Instr.getOperand(OpIdx);
1709 const TargetRegisterClass *OpRegCstraints =
1710 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1711
1712 // If there's no constraint, there's nothing to do.
1713 if (!OpRegCstraints)
1714 continue;
1715 // If the operand is a frame index, there's nothing to do here.
1716 // A frame index operand will resolve correctly during PEI.
1717 if (MO.isFI())
1718 continue;
1719
1720 assert(MO.isReg() &&
1721 "Operand has register constraints without being a register!");
1722
1723 Register Reg = MO.getReg();
1724 if (Reg.isPhysical()) {
1725 if (!OpRegCstraints->contains(Reg))
1726 return false;
1727 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1728 !MRI->constrainRegClass(Reg, OpRegCstraints))
1729 return false;
1730 }
1731
1732 return true;
1733}
1734
1735/// Return the opcode that does not set flags when possible - otherwise
1736/// return the original opcode. The caller is responsible to do the actual
1737/// substitution and legality checking.
1739 // Don't convert all compare instructions, because for some the zero register
1740 // encoding becomes the sp register.
1741 bool MIDefinesZeroReg = false;
1742 if (MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1743 MI.definesRegister(AArch64::XZR, /*TRI=*/nullptr))
1744 MIDefinesZeroReg = true;
1745
1746 switch (MI.getOpcode()) {
1747 default:
1748 return MI.getOpcode();
1749 case AArch64::ADDSWrr:
1750 return AArch64::ADDWrr;
1751 case AArch64::ADDSWri:
1752 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1753 case AArch64::ADDSWrs:
1754 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1755 case AArch64::ADDSWrx:
1756 return AArch64::ADDWrx;
1757 case AArch64::ADDSXrr:
1758 return AArch64::ADDXrr;
1759 case AArch64::ADDSXri:
1760 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1761 case AArch64::ADDSXrs:
1762 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1763 case AArch64::ADDSXrx:
1764 return AArch64::ADDXrx;
1765 case AArch64::SUBSWrr:
1766 return AArch64::SUBWrr;
1767 case AArch64::SUBSWri:
1768 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1769 case AArch64::SUBSWrs:
1770 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1771 case AArch64::SUBSWrx:
1772 return AArch64::SUBWrx;
1773 case AArch64::SUBSXrr:
1774 return AArch64::SUBXrr;
1775 case AArch64::SUBSXri:
1776 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1777 case AArch64::SUBSXrs:
1778 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1779 case AArch64::SUBSXrx:
1780 return AArch64::SUBXrx;
1781 }
1782}
1783
1784enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1785
1786/// True when condition flags are accessed (either by writing or reading)
1787/// on the instruction trace starting at From and ending at To.
1788///
1789/// Note: If From and To are from different blocks it's assumed CC are accessed
1790/// on the path.
1793 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1794 // Early exit if To is at the beginning of the BB.
1795 if (To == To->getParent()->begin())
1796 return true;
1797
1798 // Check whether the instructions are in the same basic block
1799 // If not, assume the condition flags might get modified somewhere.
1800 if (To->getParent() != From->getParent())
1801 return true;
1802
1803 // From must be above To.
1804 assert(std::any_of(
1805 ++To.getReverse(), To->getParent()->rend(),
1806 [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1807
1808 // We iterate backward starting at \p To until we hit \p From.
1809 for (const MachineInstr &Instr :
1811 if (((AccessToCheck & AK_Write) &&
1812 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1813 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1814 return true;
1815 }
1816 return false;
1817}
1818
1819std::optional<unsigned>
1820AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
1821 MachineInstr *Pred,
1822 const MachineRegisterInfo *MRI) const {
1823 unsigned MaskOpcode = Mask->getOpcode();
1824 unsigned PredOpcode = Pred->getOpcode();
1825 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1826 bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1827
1828 if (PredIsWhileLike) {
1829 // For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
1830 // instruction and the condition is "any" since WHILcc does an implicit
1831 // PTEST(ALL, PG) check and PG is always a subset of ALL.
1832 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1833 return PredOpcode;
1834
1835 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1836 // redundant since WHILE performs an implicit PTEST with an all active
1837 // mask.
1838 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1839 getElementSizeForOpcode(MaskOpcode) ==
1840 getElementSizeForOpcode(PredOpcode))
1841 return PredOpcode;
1842
1843 // For PTEST_FIRST(PTRUE_ALL, WHILE), the PTEST_FIRST is redundant since
1844 // WHILEcc performs an implicit PTEST with an all active mask, setting
1845 // the N flag as the PTEST_FIRST would.
1846 if (PTest->getOpcode() == AArch64::PTEST_PP_FIRST &&
1847 isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31)
1848 return PredOpcode;
1849
1850 return {};
1851 }
1852
1853 if (PredIsPTestLike) {
1854 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1855 // instruction that sets the flags as PTEST would and the condition is
1856 // "any" since PG is always a subset of the governing predicate of the
1857 // ptest-like instruction.
1858 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1859 return PredOpcode;
1860
1861 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1862
1863 // If the PTEST like instruction's general predicate is not `Mask`, attempt
1864 // to look through a copy and try again. This is because some instructions
1865 // take a predicate whose register class is a subset of its result class.
1866 if (Mask != PTestLikeMask && PTestLikeMask->isFullCopy() &&
1867 PTestLikeMask->getOperand(1).getReg().isVirtual())
1868 PTestLikeMask =
1869 MRI->getUniqueVRegDef(PTestLikeMask->getOperand(1).getReg());
1870
1871 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1872 // the element size matches and either the PTEST_LIKE instruction uses
1873 // the same all active mask or the condition is "any".
1874 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1875 getElementSizeForOpcode(MaskOpcode) ==
1876 getElementSizeForOpcode(PredOpcode)) {
1877 if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1878 return PredOpcode;
1879 }
1880
1881 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1882 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1883 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1884 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1885 // performed by the compare could consider fewer lanes for these element
1886 // sizes.
1887 //
1888 // For example, consider
1889 //
1890 // ptrue p0.b ; P0=1111-1111-1111-1111
1891 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1892 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1893 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1894 // ; ^ last active
1895 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1896 // ; ^ last active
1897 //
1898 // where the compare generates a canonical all active 32-bit predicate
1899 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1900 // active flag, whereas the PTEST instruction with the same mask doesn't.
1901 // For PTEST_ANY this doesn't apply as the flags in this case would be
1902 // identical regardless of element size.
1903 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1904 if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
1905 PTest->getOpcode() == AArch64::PTEST_PP_ANY))
1906 return PredOpcode;
1907
1908 return {};
1909 }
1910
1911 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1912 // opcode so the PTEST becomes redundant.
1913 switch (PredOpcode) {
1914 case AArch64::AND_PPzPP:
1915 case AArch64::BIC_PPzPP:
1916 case AArch64::EOR_PPzPP:
1917 case AArch64::NAND_PPzPP:
1918 case AArch64::NOR_PPzPP:
1919 case AArch64::ORN_PPzPP:
1920 case AArch64::ORR_PPzPP:
1921 case AArch64::BRKA_PPzP:
1922 case AArch64::BRKPA_PPzPP:
1923 case AArch64::BRKB_PPzP:
1924 case AArch64::BRKPB_PPzPP:
1925 case AArch64::RDFFR_PPz: {
1926 // Check to see if our mask is the same. If not the resulting flag bits
1927 // may be different and we can't remove the ptest.
1928 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1929 if (Mask != PredMask)
1930 return {};
1931 break;
1932 }
1933 case AArch64::BRKN_PPzP: {
1934 // BRKN uses an all active implicit mask to set flags unlike the other
1935 // flag-setting instructions.
1936 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1937 if ((MaskOpcode != AArch64::PTRUE_B) ||
1938 (Mask->getOperand(1).getImm() != 31))
1939 return {};
1940 break;
1941 }
1942 case AArch64::PTRUE_B:
1943 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1944 break;
1945 default:
1946 // Bail out if we don't recognize the input
1947 return {};
1948 }
1949
1950 return convertToFlagSettingOpc(PredOpcode);
1951}
1952
1953/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1954/// operation which could set the flags in an identical manner
1955bool AArch64InstrInfo::optimizePTestInstr(
1956 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1957 const MachineRegisterInfo *MRI) const {
1958 auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1959 auto *Pred = MRI->getUniqueVRegDef(PredReg);
1960
1961 if (Pred->isCopy() && PTest->getOpcode() == AArch64::PTEST_PP_FIRST) {
1962 // Instructions which return a multi-vector (e.g. WHILECC_x2) require copies
1963 // before the branch to extract each subregister.
1964 auto Op = Pred->getOperand(1);
1965 if (Op.isReg() && Op.getReg().isVirtual() &&
1966 Op.getSubReg() == AArch64::psub0)
1967 Pred = MRI->getUniqueVRegDef(Op.getReg());
1968 }
1969
1970 unsigned PredOpcode = Pred->getOpcode();
1971 auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
1972 if (!NewOp)
1973 return false;
1974
1975 const TargetRegisterInfo *TRI = &getRegisterInfo();
1976
1977 // If another instruction between Pred and PTest accesses flags, don't remove
1978 // the ptest or update the earlier instruction to modify them.
1979 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1980 return false;
1981
1982 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1983 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1984 // operand to be replaced with an equivalent instruction that also sets the
1985 // flags.
1986 PTest->eraseFromParent();
1987 if (*NewOp != PredOpcode) {
1988 Pred->setDesc(get(*NewOp));
1989 bool succeeded = UpdateOperandRegClass(*Pred);
1990 (void)succeeded;
1991 assert(succeeded && "Operands have incompatible register classes!");
1992 Pred->addRegisterDefined(AArch64::NZCV, TRI);
1993 }
1994
1995 // Ensure that the flags def is live.
1996 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1997 unsigned i = 0, e = Pred->getNumOperands();
1998 for (; i != e; ++i) {
1999 MachineOperand &MO = Pred->getOperand(i);
2000 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
2001 MO.setIsDead(false);
2002 break;
2003 }
2004 }
2005 }
2006 return true;
2007}
2008
2009/// Try to optimize a compare instruction. A compare instruction is an
2010/// instruction which produces AArch64::NZCV. It can be truly compare
2011/// instruction
2012/// when there are no uses of its destination register.
2013///
2014/// The following steps are tried in order:
2015/// 1. Convert CmpInstr into an unconditional version.
2016/// 2. Remove CmpInstr if above there is an instruction producing a needed
2017/// condition code or an instruction which can be converted into such an
2018/// instruction.
2019/// Only comparison with zero is supported.
2021 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
2022 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
2023 assert(CmpInstr.getParent());
2024 assert(MRI);
2025
2026 // Replace SUBSWrr with SUBWrr if NZCV is not used.
2027 int DeadNZCVIdx =
2028 CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
2029 if (DeadNZCVIdx != -1) {
2030 if (CmpInstr.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
2031 CmpInstr.definesRegister(AArch64::XZR, /*TRI=*/nullptr)) {
2032 CmpInstr.eraseFromParent();
2033 return true;
2034 }
2035 unsigned Opc = CmpInstr.getOpcode();
2036 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
2037 if (NewOpc == Opc)
2038 return false;
2039 const MCInstrDesc &MCID = get(NewOpc);
2040 CmpInstr.setDesc(MCID);
2041 CmpInstr.removeOperand(DeadNZCVIdx);
2042 bool succeeded = UpdateOperandRegClass(CmpInstr);
2043 (void)succeeded;
2044 assert(succeeded && "Some operands reg class are incompatible!");
2045 return true;
2046 }
2047
2048 if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
2049 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY ||
2050 CmpInstr.getOpcode() == AArch64::PTEST_PP_FIRST)
2051 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
2052
2053 if (SrcReg2 != 0)
2054 return false;
2055
2056 // CmpInstr is a Compare instruction if destination register is not used.
2057 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
2058 return false;
2059
2060 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
2061 return true;
2062 return (CmpValue == 0 || CmpValue == 1) &&
2063 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
2064}
2065
2066/// Get opcode of S version of Instr.
2067/// If Instr is S version its opcode is returned.
2068/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
2069/// or we are not interested in it.
2070static unsigned sForm(MachineInstr &Instr) {
2071 switch (Instr.getOpcode()) {
2072 default:
2073 return AArch64::INSTRUCTION_LIST_END;
2074
2075 case AArch64::ADDSWrr:
2076 case AArch64::ADDSWri:
2077 case AArch64::ADDSXrr:
2078 case AArch64::ADDSXri:
2079 case AArch64::ADDSWrx:
2080 case AArch64::ADDSXrx:
2081 case AArch64::ADDSWrs:
2082 case AArch64::ADDSXrs:
2083 case AArch64::SUBSWrr:
2084 case AArch64::SUBSWri:
2085 case AArch64::SUBSWrx:
2086 case AArch64::SUBSWrs:
2087 case AArch64::SUBSXrr:
2088 case AArch64::SUBSXri:
2089 case AArch64::SUBSXrx:
2090 case AArch64::SUBSXrs:
2091 case AArch64::ANDSWri:
2092 case AArch64::ANDSWrr:
2093 case AArch64::ANDSWrs:
2094 case AArch64::ANDSXri:
2095 case AArch64::ANDSXrr:
2096 case AArch64::ANDSXrs:
2097 case AArch64::BICSWrr:
2098 case AArch64::BICSXrr:
2099 case AArch64::BICSWrs:
2100 case AArch64::BICSXrs:
2101 case AArch64::ADCSWr:
2102 case AArch64::ADCSXr:
2103 case AArch64::SBCSWr:
2104 case AArch64::SBCSXr:
2105 return Instr.getOpcode();
2106
2107 case AArch64::ADDWrr:
2108 return AArch64::ADDSWrr;
2109 case AArch64::ADDWri:
2110 return AArch64::ADDSWri;
2111 case AArch64::ADDXrr:
2112 return AArch64::ADDSXrr;
2113 case AArch64::ADDXri:
2114 return AArch64::ADDSXri;
2115 case AArch64::ADDWrx:
2116 return AArch64::ADDSWrx;
2117 case AArch64::ADDXrx:
2118 return AArch64::ADDSXrx;
2119 case AArch64::ADDWrs:
2120 return AArch64::ADDSWrs;
2121 case AArch64::ADDXrs:
2122 return AArch64::ADDSXrs;
2123 case AArch64::ADCWr:
2124 return AArch64::ADCSWr;
2125 case AArch64::ADCXr:
2126 return AArch64::ADCSXr;
2127 case AArch64::SUBWrr:
2128 return AArch64::SUBSWrr;
2129 case AArch64::SUBWri:
2130 return AArch64::SUBSWri;
2131 case AArch64::SUBXrr:
2132 return AArch64::SUBSXrr;
2133 case AArch64::SUBXri:
2134 return AArch64::SUBSXri;
2135 case AArch64::SUBWrx:
2136 return AArch64::SUBSWrx;
2137 case AArch64::SUBXrx:
2138 return AArch64::SUBSXrx;
2139 case AArch64::SUBWrs:
2140 return AArch64::SUBSWrs;
2141 case AArch64::SUBXrs:
2142 return AArch64::SUBSXrs;
2143 case AArch64::SBCWr:
2144 return AArch64::SBCSWr;
2145 case AArch64::SBCXr:
2146 return AArch64::SBCSXr;
2147 case AArch64::ANDWri:
2148 return AArch64::ANDSWri;
2149 case AArch64::ANDXri:
2150 return AArch64::ANDSXri;
2151 case AArch64::ANDWrr:
2152 return AArch64::ANDSWrr;
2153 case AArch64::ANDWrs:
2154 return AArch64::ANDSWrs;
2155 case AArch64::ANDXrr:
2156 return AArch64::ANDSXrr;
2157 case AArch64::ANDXrs:
2158 return AArch64::ANDSXrs;
2159 case AArch64::BICWrr:
2160 return AArch64::BICSWrr;
2161 case AArch64::BICXrr:
2162 return AArch64::BICSXrr;
2163 case AArch64::BICWrs:
2164 return AArch64::BICSWrs;
2165 case AArch64::BICXrs:
2166 return AArch64::BICSXrs;
2167 }
2168}
2169
2170/// Check if AArch64::NZCV should be alive in successors of MBB.
2172 for (auto *BB : MBB->successors())
2173 if (BB->isLiveIn(AArch64::NZCV))
2174 return true;
2175 return false;
2176}
2177
2178/// \returns The condition code operand index for \p Instr if it is a branch
2179/// or select and -1 otherwise.
2180int AArch64InstrInfo::findCondCodeUseOperandIdxForBranchOrSelect(
2181 const MachineInstr &Instr) {
2182 switch (Instr.getOpcode()) {
2183 default:
2184 return -1;
2185
2186 case AArch64::Bcc: {
2187 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
2188 assert(Idx >= 2);
2189 return Idx - 2;
2190 }
2191
2192 case AArch64::CSINVWr:
2193 case AArch64::CSINVXr:
2194 case AArch64::CSINCWr:
2195 case AArch64::CSINCXr:
2196 case AArch64::CSELWr:
2197 case AArch64::CSELXr:
2198 case AArch64::CSNEGWr:
2199 case AArch64::CSNEGXr:
2200 case AArch64::FCSELSrrr:
2201 case AArch64::FCSELDrrr: {
2202 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
2203 assert(Idx >= 1);
2204 return Idx - 1;
2205 }
2206 }
2207}
2208
2209/// Find a condition code used by the instruction.
2210/// Returns AArch64CC::Invalid if either the instruction does not use condition
2211/// codes or we don't optimize CmpInstr in the presence of such instructions.
2213 int CCIdx =
2214 AArch64InstrInfo::findCondCodeUseOperandIdxForBranchOrSelect(Instr);
2215 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
2216 Instr.getOperand(CCIdx).getImm())
2218}
2219
2222 UsedNZCV UsedFlags;
2223 switch (CC) {
2224 default:
2225 break;
2226
2227 case AArch64CC::EQ: // Z set
2228 case AArch64CC::NE: // Z clear
2229 UsedFlags.Z = true;
2230 break;
2231
2232 case AArch64CC::HI: // Z clear and C set
2233 case AArch64CC::LS: // Z set or C clear
2234 UsedFlags.Z = true;
2235 [[fallthrough]];
2236 case AArch64CC::HS: // C set
2237 case AArch64CC::LO: // C clear
2238 UsedFlags.C = true;
2239 break;
2240
2241 case AArch64CC::MI: // N set
2242 case AArch64CC::PL: // N clear
2243 UsedFlags.N = true;
2244 break;
2245
2246 case AArch64CC::VS: // V set
2247 case AArch64CC::VC: // V clear
2248 UsedFlags.V = true;
2249 break;
2250
2251 case AArch64CC::GT: // Z clear, N and V the same
2252 case AArch64CC::LE: // Z set, N and V differ
2253 UsedFlags.Z = true;
2254 [[fallthrough]];
2255 case AArch64CC::GE: // N and V the same
2256 case AArch64CC::LT: // N and V differ
2257 UsedFlags.N = true;
2258 UsedFlags.V = true;
2259 break;
2260 }
2261 return UsedFlags;
2262}
2263
2264/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
2265/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
2266/// \returns std::nullopt otherwise.
2267///
2268/// Collect instructions using that flags in \p CCUseInstrs if provided.
2269std::optional<UsedNZCV>
2271 const TargetRegisterInfo &TRI,
2272 SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
2273 MachineBasicBlock *CmpParent = CmpInstr.getParent();
2274 if (MI.getParent() != CmpParent)
2275 return std::nullopt;
2276
2277 if (areCFlagsAliveInSuccessors(CmpParent))
2278 return std::nullopt;
2279
2280 UsedNZCV NZCVUsedAfterCmp;
2282 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
2283 if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
2285 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
2286 return std::nullopt;
2287 NZCVUsedAfterCmp |= getUsedNZCV(CC);
2288 if (CCUseInstrs)
2289 CCUseInstrs->push_back(&Instr);
2290 }
2291 if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
2292 break;
2293 }
2294 return NZCVUsedAfterCmp;
2295}
2296
2297static bool isADDSRegImm(unsigned Opcode) {
2298 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
2299}
2300
2301static bool isSUBSRegImm(unsigned Opcode) {
2302 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
2303}
2304
2306 unsigned Opc = sForm(MI);
2307 switch (Opc) {
2308 case AArch64::ANDSWri:
2309 case AArch64::ANDSWrr:
2310 case AArch64::ANDSWrs:
2311 case AArch64::ANDSXri:
2312 case AArch64::ANDSXrr:
2313 case AArch64::ANDSXrs:
2314 case AArch64::BICSWrr:
2315 case AArch64::BICSXrr:
2316 case AArch64::BICSWrs:
2317 case AArch64::BICSXrs:
2318 return true;
2319 default:
2320 return false;
2321 }
2322}
2323
2324/// Check if CmpInstr can be substituted by MI.
2325///
2326/// CmpInstr can be substituted:
2327/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
2328/// - and, MI and CmpInstr are from the same MachineBB
2329/// - and, condition flags are not alive in successors of the CmpInstr parent
2330/// - and, if MI opcode is the S form there must be no defs of flags between
2331/// MI and CmpInstr
2332/// or if MI opcode is not the S form there must be neither defs of flags
2333/// nor uses of flags between MI and CmpInstr.
2334/// - and, C is not used after CmpInstr; CmpInstr's C is from adds/subs #0 on
2335/// SrcReg and can differ from MI (e.g. carry out of ADCS/SBCS).
2336/// - and, V is not used after CmpInstr unless MI is AND/BIC (V cleared) or MI
2337/// has NoSWrap (overflow is poison and the fold is still safe).
2339 const TargetRegisterInfo &TRI) {
2340 // MI is an opcode sForm maps (add/sub/adc/sbc/and/bic and their S forms).
2341 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
2342
2343 const unsigned CmpOpcode = CmpInstr.getOpcode();
2344 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
2345 return false;
2346
2347 assert((CmpInstr.getOperand(2).isImm() &&
2348 CmpInstr.getOperand(2).getImm() == 0) &&
2349 "Caller guarantees that CmpInstr compares with constant 0");
2350
2351 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
2352 if (!NZVCUsed || NZVCUsed->C)
2353 return false;
2354
2355 // CmpInstr is ADDS/SUBS with immediate 0 on SrcReg (compare SrcReg to zero).
2356 // After the fold, users see NZCV from MI (or its S form), not from CmpInstr.
2357 // N/Z match CmpInstr for the value in SrcReg; C/V need not match in general
2358 // (e.g. ADCS vs adds #0), so we require C unused after CmpInstr and gate V
2359 // as below. NoSWrap makes signed overflow poison; AND/BIC clear V.
2360 if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap) && !isANDOpcode(MI))
2361 return false;
2362
2363 AccessKind AccessToCheck = AK_Write;
2364 if (sForm(MI) != MI.getOpcode())
2365 AccessToCheck = AK_All;
2366 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
2367}
2368
2369/// Substitute an instruction comparing to zero with another instruction
2370/// which produces needed condition flags.
2371///
2372/// Return true on success.
2373bool AArch64InstrInfo::substituteCmpToZero(
2374 MachineInstr &CmpInstr, unsigned SrcReg,
2375 const MachineRegisterInfo &MRI) const {
2376 // Get the unique definition of SrcReg.
2377 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
2378 if (!MI)
2379 return false;
2380
2381 const TargetRegisterInfo &TRI = getRegisterInfo();
2382
2383 unsigned NewOpc = sForm(*MI);
2384 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
2385 return false;
2386
2387 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
2388 return false;
2389
2390 // Update the instruction to set NZCV.
2391 MI->setDesc(get(NewOpc));
2392 CmpInstr.eraseFromParent();
2394 (void)succeeded;
2395 assert(succeeded && "Some operands reg class are incompatible!");
2396 MI->addRegisterDefined(AArch64::NZCV, &TRI);
2397 return true;
2398}
2399
2400/// \returns True if \p CmpInstr can be removed.
2401///
2402/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
2403/// codes used in \p CCUseInstrs must be inverted.
2405 int CmpValue, const TargetRegisterInfo &TRI,
2407 bool &IsInvertCC) {
2408 assert((CmpValue == 0 || CmpValue == 1) &&
2409 "Only comparisons to 0 or 1 considered for removal!");
2410
2411 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
2412 unsigned MIOpc = MI.getOpcode();
2413 if (MIOpc == AArch64::CSINCWr) {
2414 if (MI.getOperand(1).getReg() != AArch64::WZR ||
2415 MI.getOperand(2).getReg() != AArch64::WZR)
2416 return false;
2417 } else if (MIOpc == AArch64::CSINCXr) {
2418 if (MI.getOperand(1).getReg() != AArch64::XZR ||
2419 MI.getOperand(2).getReg() != AArch64::XZR)
2420 return false;
2421 } else {
2422 return false;
2423 }
2425 if (MICC == AArch64CC::Invalid)
2426 return false;
2427
2428 // NZCV needs to be defined
2429 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) != -1)
2430 return false;
2431
2432 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
2433 const unsigned CmpOpcode = CmpInstr.getOpcode();
2434 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
2435 if (CmpValue && !IsSubsRegImm)
2436 return false;
2437 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
2438 return false;
2439
2440 // MI conditions allowed: eq, ne, mi, pl
2441 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
2442 if (MIUsedNZCV.C || MIUsedNZCV.V)
2443 return false;
2444
2445 std::optional<UsedNZCV> NZCVUsedAfterCmp =
2446 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
2447 // Condition flags are not used in CmpInstr basic block successors and only
2448 // Z or N flags allowed to be used after CmpInstr within its basic block
2449 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
2450 return false;
2451 // Z or N flag used after CmpInstr must correspond to the flag used in MI
2452 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
2453 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
2454 return false;
2455 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
2456 if (MIUsedNZCV.N && !CmpValue)
2457 return false;
2458
2459 // There must be no defs of flags between MI and CmpInstr
2460 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
2461 return false;
2462
2463 // Condition code is inverted in the following cases:
2464 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
2465 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
2466 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
2467 (!CmpValue && MICC == AArch64CC::NE);
2468 return true;
2469}
2470
2471/// Remove comparison in csinc-cmp sequence
2472///
2473/// Examples:
2474/// 1. \code
2475/// csinc w9, wzr, wzr, ne
2476/// cmp w9, #0
2477/// b.eq
2478/// \endcode
2479/// to
2480/// \code
2481/// csinc w9, wzr, wzr, ne
2482/// b.ne
2483/// \endcode
2484///
2485/// 2. \code
2486/// csinc x2, xzr, xzr, mi
2487/// cmp x2, #1
2488/// b.pl
2489/// \endcode
2490/// to
2491/// \code
2492/// csinc x2, xzr, xzr, mi
2493/// b.pl
2494/// \endcode
2495///
2496/// \param CmpInstr comparison instruction
2497/// \return True when comparison removed
2498bool AArch64InstrInfo::removeCmpToZeroOrOne(
2499 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
2500 const MachineRegisterInfo &MRI) const {
2501 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
2502 if (!MI)
2503 return false;
2504 const TargetRegisterInfo &TRI = getRegisterInfo();
2505 SmallVector<MachineInstr *, 4> CCUseInstrs;
2506 bool IsInvertCC = false;
2507 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
2508 IsInvertCC))
2509 return false;
2510 // Make transformation
2511 CmpInstr.eraseFromParent();
2512 if (IsInvertCC) {
2513 // Invert condition codes in CmpInstr CC users
2514 for (MachineInstr *CCUseInstr : CCUseInstrs) {
2515 int Idx = findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr);
2516 assert(Idx >= 0 && "Unexpected instruction using CC.");
2517 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
2519 static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
2520 CCOperand.setImm(CCUse);
2521 }
2522 }
2523 return true;
2524}
2525
2526bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
2527 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
2528 MI.getOpcode() != AArch64::CATCHRET &&
2529 MI.getOpcode() != AArch64::STACK_GUARD_UNMIX)
2530 return false;
2531
2532 MachineBasicBlock &MBB = *MI.getParent();
2533 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
2534 auto TRI = Subtarget.getRegisterInfo();
2535 DebugLoc DL = MI.getDebugLoc();
2536
2537 if (MI.getOpcode() == AArch64::STACK_GUARD_UNMIX) {
2538 // Expand STACK_GUARD_UNMIX to: sub Rd, fp, Rs
2539 // This computes FP - stored_mixed_value to unmix the cookie
2540 Register DstReg = MI.getOperand(0).getReg();
2541 Register SrcReg = MI.getOperand(1).getReg();
2542
2543 BuildMI(MBB, MI, DL, get(AArch64::SUBXrr), DstReg)
2544 .addReg(AArch64::FP)
2545 .addReg(SrcReg);
2546
2547 MBB.erase(MI);
2548 return true;
2549 }
2550
2551 if (MI.getOpcode() == AArch64::CATCHRET) {
2552 // Skip to the first instruction before the epilog.
2553 const TargetInstrInfo *TII =
2555 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
2557 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
2558 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
2559 FirstEpilogSEH != MBB.begin())
2560 FirstEpilogSEH = std::prev(FirstEpilogSEH);
2561 if (FirstEpilogSEH != MBB.begin())
2562 FirstEpilogSEH = std::next(FirstEpilogSEH);
2563 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
2564 .addReg(AArch64::X0, RegState::Define)
2565 .addMBB(TargetMBB);
2566 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
2567 .addReg(AArch64::X0, RegState::Define)
2568 .addReg(AArch64::X0)
2569 .addMBB(TargetMBB)
2570 .addImm(0);
2571 TargetMBB->setMachineBlockAddressTaken();
2572 return true;
2573 }
2574
2575 Register Reg = MI.getOperand(0).getReg();
2577 if (M.getStackProtectorGuard() == "sysreg") {
2578 const AArch64SysReg::SysReg *SrcReg =
2579 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
2580 if (!SrcReg)
2581 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
2582
2583 // mrs xN, sysreg
2584 BuildMI(MBB, MI, DL, get(AArch64::MRS))
2586 .addImm(SrcReg->Encoding);
2587 int Offset = M.getStackProtectorGuardOffset();
2588 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
2589 // ldr xN, [xN, #offset]
2590 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2591 .addDef(Reg)
2593 .addImm(Offset / 8);
2594 } else if (Offset >= -256 && Offset <= 255) {
2595 // ldur xN, [xN, #offset]
2596 BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
2597 .addDef(Reg)
2599 .addImm(Offset);
2600 } else if (Offset >= -4095 && Offset <= 4095) {
2601 if (Offset > 0) {
2602 // add xN, xN, #offset
2603 BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
2604 .addDef(Reg)
2606 .addImm(Offset)
2607 .addImm(0);
2608 } else {
2609 // sub xN, xN, #offset
2610 BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
2611 .addDef(Reg)
2613 .addImm(-Offset)
2614 .addImm(0);
2615 }
2616 // ldr xN, [xN]
2617 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2618 .addDef(Reg)
2620 .addImm(0);
2621 } else {
2622 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2623 // than 23760.
2624 // It might be nice to use AArch64::MOVi32imm here, which would get
2625 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2626 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2627 // AArch64FrameLowering might help us find such a scratch register
2628 // though. If we failed to find a scratch register, we could emit a
2629 // stream of add instructions to build up the immediate. Or, we could try
2630 // to insert a AArch64::MOVi32imm before register allocation so that we
2631 // didn't need to scavenge for a scratch register.
2632 report_fatal_error("Unable to encode Stack Protector Guard Offset");
2633 }
2634 MBB.erase(MI);
2635 return true;
2636 }
2637
2638 const GlobalValue *GV =
2639 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
2640 const TargetMachine &TM = MBB.getParent()->getTarget();
2641 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2642 const unsigned char MO_NC = AArch64II::MO_NC;
2643
2644 unsigned GuardWidth = M.getStackProtectorGuardValueWidth().value_or(
2645 Subtarget.isTargetILP32() ? 4 : 8);
2646 if (GuardWidth != 4 && GuardWidth != 8)
2647 report_fatal_error("Unsupported stack protector value width");
2648 if ((OpFlags & AArch64II::MO_GOT) != 0) {
2649 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
2650 .addGlobalAddress(GV, 0, OpFlags);
2651 if (GuardWidth == 4) {
2652 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2653 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2654 .addDef(Reg32, RegState::Dead)
2656 .addImm(0)
2657 .addMemOperand(*MI.memoperands_begin())
2659 } else {
2660 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2662 .addImm(0)
2663 .addMemOperand(*MI.memoperands_begin());
2664 }
2665 } else if (TM.getCodeModel() == CodeModel::Large) {
2666 if (GuardWidth == 4)
2667 report_fatal_error("Large code model with 4-byte stack protector not yet "
2668 "supported");
2669 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
2670 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
2671 .addImm(0);
2672 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2674 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
2675 .addImm(16);
2676 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2678 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
2679 .addImm(32);
2680 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2683 .addImm(48);
2684 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2686 .addImm(0)
2687 .addMemOperand(*MI.memoperands_begin());
2688 } else {
2689 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
2690 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
2691 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2692 if (GuardWidth == 4) {
2693 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2694 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2695 .addDef(Reg32, RegState::Dead)
2697 .addGlobalAddress(GV, 0, LoFlags)
2698 .addMemOperand(*MI.memoperands_begin())
2700 } else {
2701 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2703 .addGlobalAddress(GV, 0, LoFlags)
2704 .addMemOperand(*MI.memoperands_begin());
2705 }
2706 }
2707 // To match MSVC. Unlike x86_64 which uses xor instruction to mix the cookie,
2708 // we use sub instruction to mix the cookie on aarch64.
2709 // The mixing happens here in expandPostRAPseudo (after RA) to ensure we use
2710 // the final frame pointer value.
2711 if (Subtarget.getTargetTriple().isOSMSVCRT())
2712 BuildMI(MBB, MI, DL, get(AArch64::SUBXrr), Reg)
2713 .addReg(AArch64::FP)
2715
2716 MBB.erase(MI);
2717
2718 return true;
2719}
2720
2721// Return true if this instruction simply sets its single destination register
2722// to zero. This is equivalent to a register rename of the zero-register.
2724 switch (MI.getOpcode()) {
2725 default:
2726 break;
2727 case AArch64::MOVZWi:
2728 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2729 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2730 assert(MI.getDesc().getNumOperands() == 3 &&
2731 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2732 return true;
2733 }
2734 break;
2735 case AArch64::ANDWri: // and Rd, Rzr, #imm
2736 return MI.getOperand(1).getReg() == AArch64::WZR;
2737 case AArch64::ANDXri:
2738 return MI.getOperand(1).getReg() == AArch64::XZR;
2739 case TargetOpcode::COPY:
2740 return MI.getOperand(1).getReg() == AArch64::WZR;
2741 }
2742 return false;
2743}
2744
2745// Return true if this instruction simply renames a general register without
2746// modifying bits.
2748 switch (MI.getOpcode()) {
2749 default:
2750 break;
2751 case TargetOpcode::COPY: {
2752 // GPR32 copies will by lowered to ORRXrs
2753 Register DstReg = MI.getOperand(0).getReg();
2754 return (AArch64::GPR32RegClass.contains(DstReg) ||
2755 AArch64::GPR64RegClass.contains(DstReg));
2756 }
2757 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2758 if (MI.getOperand(1).getReg() == AArch64::XZR) {
2759 assert(MI.getDesc().getNumOperands() == 4 &&
2760 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2761 return true;
2762 }
2763 break;
2764 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2765 if (MI.getOperand(2).getImm() == 0) {
2766 assert(MI.getDesc().getNumOperands() == 4 &&
2767 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2768 return true;
2769 }
2770 break;
2771 }
2772 return false;
2773}
2774
2775// Return true if this instruction simply renames a general register without
2776// modifying bits.
2778 switch (MI.getOpcode()) {
2779 default:
2780 break;
2781 case TargetOpcode::COPY: {
2782 Register DstReg = MI.getOperand(0).getReg();
2783 return AArch64::FPR128RegClass.contains(DstReg);
2784 }
2785 case AArch64::ORRv16i8:
2786 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2787 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2788 "invalid ORRv16i8 operands");
2789 return true;
2790 }
2791 break;
2792 }
2793 return false;
2794}
2795
2796static bool isFrameLoadOpcode(int Opcode) {
2797 switch (Opcode) {
2798 default:
2799 return false;
2800 case AArch64::LDRWui:
2801 case AArch64::LDRXui:
2802 case AArch64::LDRBui:
2803 case AArch64::LDRHui:
2804 case AArch64::LDRSui:
2805 case AArch64::LDRDui:
2806 case AArch64::LDRQui:
2807 case AArch64::LDR_PXI:
2808 return true;
2809 }
2810}
2811
2813 int &FrameIndex) const {
2814 if (!isFrameLoadOpcode(MI.getOpcode()))
2815 return Register();
2816
2817 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2818 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2819 FrameIndex = MI.getOperand(1).getIndex();
2820 return MI.getOperand(0).getReg();
2821 }
2822 return Register();
2823}
2824
2825static bool isFrameStoreOpcode(int Opcode) {
2826 switch (Opcode) {
2827 default:
2828 return false;
2829 case AArch64::STRWui:
2830 case AArch64::STRXui:
2831 case AArch64::STRBui:
2832 case AArch64::STRHui:
2833 case AArch64::STRSui:
2834 case AArch64::STRDui:
2835 case AArch64::STRQui:
2836 case AArch64::STR_PXI:
2837 return true;
2838 }
2839}
2840
2842 int &FrameIndex) const {
2843 if (!isFrameStoreOpcode(MI.getOpcode()))
2844 return Register();
2845
2846 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2847 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2848 FrameIndex = MI.getOperand(1).getIndex();
2849 return MI.getOperand(0).getReg();
2850 }
2851 return Register();
2852}
2853
2855 int &FrameIndex) const {
2856 if (!isFrameStoreOpcode(MI.getOpcode()))
2857 return Register();
2858
2859 if (Register Reg = isStoreToStackSlot(MI, FrameIndex))
2860 return Reg;
2861
2863 if (hasStoreToStackSlot(MI, Accesses)) {
2864 if (Accesses.size() > 1)
2865 return Register();
2866
2867 FrameIndex =
2868 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
2869 ->getFrameIndex();
2870 return MI.getOperand(0).getReg();
2871 }
2872 return Register();
2873}
2874
2876 int &FrameIndex) const {
2877 if (!isFrameLoadOpcode(MI.getOpcode()))
2878 return Register();
2879
2880 if (Register Reg = isLoadFromStackSlot(MI, FrameIndex))
2881 return Reg;
2882
2884 if (hasLoadFromStackSlot(MI, Accesses)) {
2885 if (Accesses.size() > 1)
2886 return Register();
2887
2888 FrameIndex =
2889 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
2890 ->getFrameIndex();
2891 return MI.getOperand(0).getReg();
2892 }
2893 return Register();
2894}
2895
2896/// Check all MachineMemOperands for a hint to suppress pairing.
2898 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2899 return MMO->getFlags() & MOSuppressPair;
2900 });
2901}
2902
2903/// Set a flag on the first MachineMemOperand to suppress pairing.
2905 if (MI.memoperands_empty())
2906 return;
2907 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2908}
2909
2910/// Check all MachineMemOperands for a hint that the load/store is strided.
2912 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2913 return MMO->getFlags() & MOStridedAccess;
2914 });
2915}
2916
2918 switch (Opc) {
2919 default:
2920 return false;
2921 case AArch64::STURSi:
2922 case AArch64::STRSpre:
2923 case AArch64::STURDi:
2924 case AArch64::STRDpre:
2925 case AArch64::STURQi:
2926 case AArch64::STRQpre:
2927 case AArch64::STURBBi:
2928 case AArch64::STURHHi:
2929 case AArch64::STURWi:
2930 case AArch64::STRWpre:
2931 case AArch64::STURXi:
2932 case AArch64::STRXpre:
2933 case AArch64::LDURSi:
2934 case AArch64::LDRSpre:
2935 case AArch64::LDURDi:
2936 case AArch64::LDRDpre:
2937 case AArch64::LDURQi:
2938 case AArch64::LDRQpre:
2939 case AArch64::LDURWi:
2940 case AArch64::LDRWpre:
2941 case AArch64::LDURXi:
2942 case AArch64::LDRXpre:
2943 case AArch64::LDRSWpre:
2944 case AArch64::LDURSWi:
2945 case AArch64::LDURHHi:
2946 case AArch64::LDURBBi:
2947 case AArch64::LDURSBWi:
2948 case AArch64::LDURSHWi:
2949 return true;
2950 }
2951}
2952
2953std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2954 switch (Opc) {
2955 default: return {};
2956 case AArch64::PRFMui: return AArch64::PRFUMi;
2957 case AArch64::LDRXui: return AArch64::LDURXi;
2958 case AArch64::LDRWui: return AArch64::LDURWi;
2959 case AArch64::LDRBui: return AArch64::LDURBi;
2960 case AArch64::LDRHui: return AArch64::LDURHi;
2961 case AArch64::LDRSui: return AArch64::LDURSi;
2962 case AArch64::LDRDui: return AArch64::LDURDi;
2963 case AArch64::LDRQui: return AArch64::LDURQi;
2964 case AArch64::LDRBBui: return AArch64::LDURBBi;
2965 case AArch64::LDRHHui: return AArch64::LDURHHi;
2966 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2967 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2968 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2969 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2970 case AArch64::LDRSWui: return AArch64::LDURSWi;
2971 case AArch64::STRXui: return AArch64::STURXi;
2972 case AArch64::STRWui: return AArch64::STURWi;
2973 case AArch64::STRBui: return AArch64::STURBi;
2974 case AArch64::STRHui: return AArch64::STURHi;
2975 case AArch64::STRSui: return AArch64::STURSi;
2976 case AArch64::STRDui: return AArch64::STURDi;
2977 case AArch64::STRQui: return AArch64::STURQi;
2978 case AArch64::STRBBui: return AArch64::STURBBi;
2979 case AArch64::STRHHui: return AArch64::STURHHi;
2980 }
2981}
2982
2984 switch (Opc) {
2985 default:
2986 llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx");
2987 case AArch64::ADDG:
2988 case AArch64::LDAPURBi:
2989 case AArch64::LDAPURHi:
2990 case AArch64::LDAPURi:
2991 case AArch64::LDAPURSBWi:
2992 case AArch64::LDAPURSBXi:
2993 case AArch64::LDAPURSHWi:
2994 case AArch64::LDAPURSHXi:
2995 case AArch64::LDAPURSWi:
2996 case AArch64::LDAPURXi:
2997 case AArch64::LDR_PPXI:
2998 case AArch64::LDR_PXI:
2999 case AArch64::LDR_ZXI:
3000 case AArch64::LDR_ZZXI:
3001 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
3002 case AArch64::LDR_ZZZXI:
3003 case AArch64::LDR_ZZZZXI:
3004 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
3005 case AArch64::LDRBBui:
3006 case AArch64::LDRBui:
3007 case AArch64::LDRDui:
3008 case AArch64::LDRHHui:
3009 case AArch64::LDRHui:
3010 case AArch64::LDRQui:
3011 case AArch64::LDRSBWui:
3012 case AArch64::LDRSBXui:
3013 case AArch64::LDRSHWui:
3014 case AArch64::LDRSHXui:
3015 case AArch64::LDRSui:
3016 case AArch64::LDRSWui:
3017 case AArch64::LDRWui:
3018 case AArch64::LDRXui:
3019 case AArch64::LDURBBi:
3020 case AArch64::LDURBi:
3021 case AArch64::LDURDi:
3022 case AArch64::LDURHHi:
3023 case AArch64::LDURHi:
3024 case AArch64::LDURQi:
3025 case AArch64::LDURSBWi:
3026 case AArch64::LDURSBXi:
3027 case AArch64::LDURSHWi:
3028 case AArch64::LDURSHXi:
3029 case AArch64::LDURSi:
3030 case AArch64::LDURSWi:
3031 case AArch64::LDURWi:
3032 case AArch64::LDURXi:
3033 case AArch64::PRFMui:
3034 case AArch64::PRFUMi:
3035 case AArch64::ST2Gi:
3036 case AArch64::STGi:
3037 case AArch64::STLURBi:
3038 case AArch64::STLURHi:
3039 case AArch64::STLURWi:
3040 case AArch64::STLURXi:
3041 case AArch64::StoreSwiftAsyncContext:
3042 case AArch64::STR_PPXI:
3043 case AArch64::STR_PXI:
3044 case AArch64::STR_ZXI:
3045 case AArch64::STR_ZZXI:
3046 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
3047 case AArch64::STR_ZZZXI:
3048 case AArch64::STR_ZZZZXI:
3049 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
3050 case AArch64::STRBBui:
3051 case AArch64::STRBui:
3052 case AArch64::STRDui:
3053 case AArch64::STRHHui:
3054 case AArch64::STRHui:
3055 case AArch64::STRQui:
3056 case AArch64::STRSui:
3057 case AArch64::STRWui:
3058 case AArch64::STRXui:
3059 case AArch64::STURBBi:
3060 case AArch64::STURBi:
3061 case AArch64::STURDi:
3062 case AArch64::STURHHi:
3063 case AArch64::STURHi:
3064 case AArch64::STURQi:
3065 case AArch64::STURSi:
3066 case AArch64::STURWi:
3067 case AArch64::STURXi:
3068 case AArch64::STZ2Gi:
3069 case AArch64::STZGi:
3070 case AArch64::TAGPstack:
3071 return 2;
3072 case AArch64::LD1B_D_IMM:
3073 case AArch64::LD1B_H_IMM:
3074 case AArch64::LD1B_IMM:
3075 case AArch64::LD1B_S_IMM:
3076 case AArch64::LD1D_IMM:
3077 case AArch64::LD1H_D_IMM:
3078 case AArch64::LD1H_IMM:
3079 case AArch64::LD1H_S_IMM:
3080 case AArch64::LD1RB_D_IMM:
3081 case AArch64::LD1RB_H_IMM:
3082 case AArch64::LD1RB_IMM:
3083 case AArch64::LD1RB_S_IMM:
3084 case AArch64::LD1RD_IMM:
3085 case AArch64::LD1RH_D_IMM:
3086 case AArch64::LD1RH_IMM:
3087 case AArch64::LD1RH_S_IMM:
3088 case AArch64::LD1RSB_D_IMM:
3089 case AArch64::LD1RSB_H_IMM:
3090 case AArch64::LD1RSB_S_IMM:
3091 case AArch64::LD1RSH_D_IMM:
3092 case AArch64::LD1RSH_S_IMM:
3093 case AArch64::LD1RSW_IMM:
3094 case AArch64::LD1RW_D_IMM:
3095 case AArch64::LD1RW_IMM:
3096 case AArch64::LD1SB_D_IMM:
3097 case AArch64::LD1SB_H_IMM:
3098 case AArch64::LD1SB_S_IMM:
3099 case AArch64::LD1SH_D_IMM:
3100 case AArch64::LD1SH_S_IMM:
3101 case AArch64::LD1SW_D_IMM:
3102 case AArch64::LD1W_D_IMM:
3103 case AArch64::LD1W_IMM:
3104 case AArch64::LD2B_IMM:
3105 case AArch64::LD2D_IMM:
3106 case AArch64::LD2H_IMM:
3107 case AArch64::LD2W_IMM:
3108 case AArch64::LD3B_IMM:
3109 case AArch64::LD3D_IMM:
3110 case AArch64::LD3H_IMM:
3111 case AArch64::LD3W_IMM:
3112 case AArch64::LD4B_IMM:
3113 case AArch64::LD4D_IMM:
3114 case AArch64::LD4H_IMM:
3115 case AArch64::LD4W_IMM:
3116 case AArch64::LDG:
3117 case AArch64::LDNF1B_D_IMM:
3118 case AArch64::LDNF1B_H_IMM:
3119 case AArch64::LDNF1B_IMM:
3120 case AArch64::LDNF1B_S_IMM:
3121 case AArch64::LDNF1D_IMM:
3122 case AArch64::LDNF1H_D_IMM:
3123 case AArch64::LDNF1H_IMM:
3124 case AArch64::LDNF1H_S_IMM:
3125 case AArch64::LDNF1SB_D_IMM:
3126 case AArch64::LDNF1SB_H_IMM:
3127 case AArch64::LDNF1SB_S_IMM:
3128 case AArch64::LDNF1SH_D_IMM:
3129 case AArch64::LDNF1SH_S_IMM:
3130 case AArch64::LDNF1SW_D_IMM:
3131 case AArch64::LDNF1W_D_IMM:
3132 case AArch64::LDNF1W_IMM:
3133 case AArch64::LDNPDi:
3134 case AArch64::LDNPQi:
3135 case AArch64::LDNPSi:
3136 case AArch64::LDNPWi:
3137 case AArch64::LDNPXi:
3138 case AArch64::LDNT1B_ZRI:
3139 case AArch64::LDNT1D_ZRI:
3140 case AArch64::LDNT1H_ZRI:
3141 case AArch64::LDNT1W_ZRI:
3142 case AArch64::LDPDi:
3143 case AArch64::LDPQi:
3144 case AArch64::LDPSi:
3145 case AArch64::LDPWi:
3146 case AArch64::LDPXi:
3147 case AArch64::LDRBBpost:
3148 case AArch64::LDRBBpre:
3149 case AArch64::LDRBpost:
3150 case AArch64::LDRBpre:
3151 case AArch64::LDRDpost:
3152 case AArch64::LDRDpre:
3153 case AArch64::LDRHHpost:
3154 case AArch64::LDRHHpre:
3155 case AArch64::LDRHpost:
3156 case AArch64::LDRHpre:
3157 case AArch64::LDRQpost:
3158 case AArch64::LDRQpre:
3159 case AArch64::LDRSpost:
3160 case AArch64::LDRSpre:
3161 case AArch64::LDRWpost:
3162 case AArch64::LDRWpre:
3163 case AArch64::LDRXpost:
3164 case AArch64::LDRXpre:
3165 case AArch64::ST1B_D_IMM:
3166 case AArch64::ST1B_H_IMM:
3167 case AArch64::ST1B_IMM:
3168 case AArch64::ST1B_S_IMM:
3169 case AArch64::ST1D_IMM:
3170 case AArch64::ST1H_D_IMM:
3171 case AArch64::ST1H_IMM:
3172 case AArch64::ST1H_S_IMM:
3173 case AArch64::ST1W_D_IMM:
3174 case AArch64::ST1W_IMM:
3175 case AArch64::ST2B_IMM:
3176 case AArch64::ST2D_IMM:
3177 case AArch64::ST2H_IMM:
3178 case AArch64::ST2W_IMM:
3179 case AArch64::ST3B_IMM:
3180 case AArch64::ST3D_IMM:
3181 case AArch64::ST3H_IMM:
3182 case AArch64::ST3W_IMM:
3183 case AArch64::ST4B_IMM:
3184 case AArch64::ST4D_IMM:
3185 case AArch64::ST4H_IMM:
3186 case AArch64::ST4W_IMM:
3187 case AArch64::STGPi:
3188 case AArch64::STGPreIndex:
3189 case AArch64::STZGPreIndex:
3190 case AArch64::ST2GPreIndex:
3191 case AArch64::STZ2GPreIndex:
3192 case AArch64::STGPostIndex:
3193 case AArch64::STZGPostIndex:
3194 case AArch64::ST2GPostIndex:
3195 case AArch64::STZ2GPostIndex:
3196 case AArch64::STNPDi:
3197 case AArch64::STNPQi:
3198 case AArch64::STNPSi:
3199 case AArch64::STNPWi:
3200 case AArch64::STNPXi:
3201 case AArch64::STNT1B_ZRI:
3202 case AArch64::STNT1D_ZRI:
3203 case AArch64::STNT1H_ZRI:
3204 case AArch64::STNT1W_ZRI:
3205 case AArch64::STPDi:
3206 case AArch64::STPQi:
3207 case AArch64::STPSi:
3208 case AArch64::STPWi:
3209 case AArch64::STPXi:
3210 case AArch64::STRBBpost:
3211 case AArch64::STRBBpre:
3212 case AArch64::STRBpost:
3213 case AArch64::STRBpre:
3214 case AArch64::STRDpost:
3215 case AArch64::STRDpre:
3216 case AArch64::STRHHpost:
3217 case AArch64::STRHHpre:
3218 case AArch64::STRHpost:
3219 case AArch64::STRHpre:
3220 case AArch64::STRQpost:
3221 case AArch64::STRQpre:
3222 case AArch64::STRSpost:
3223 case AArch64::STRSpre:
3224 case AArch64::STRWpost:
3225 case AArch64::STRWpre:
3226 case AArch64::STRXpost:
3227 case AArch64::STRXpre:
3228 case AArch64::LD1B_2Z_IMM:
3229 case AArch64::LD1B_2Z_STRIDED_IMM:
3230 case AArch64::LD1H_2Z_IMM:
3231 case AArch64::LD1H_2Z_STRIDED_IMM:
3232 case AArch64::LD1W_2Z_IMM:
3233 case AArch64::LD1W_2Z_STRIDED_IMM:
3234 case AArch64::LD1D_2Z_IMM:
3235 case AArch64::LD1D_2Z_STRIDED_IMM:
3236 case AArch64::LD1B_4Z_IMM:
3237 case AArch64::LD1B_4Z_STRIDED_IMM:
3238 case AArch64::LD1H_4Z_IMM:
3239 case AArch64::LD1H_4Z_STRIDED_IMM:
3240 case AArch64::LD1W_4Z_IMM:
3241 case AArch64::LD1W_4Z_STRIDED_IMM:
3242 case AArch64::LD1D_4Z_IMM:
3243 case AArch64::LD1D_4Z_STRIDED_IMM:
3244 case AArch64::LD1B_2Z_IMM_PSEUDO:
3245 case AArch64::LD1H_2Z_IMM_PSEUDO:
3246 case AArch64::LD1W_2Z_IMM_PSEUDO:
3247 case AArch64::LD1D_2Z_IMM_PSEUDO:
3248 case AArch64::LD1B_4Z_IMM_PSEUDO:
3249 case AArch64::LD1H_4Z_IMM_PSEUDO:
3250 case AArch64::LD1W_4Z_IMM_PSEUDO:
3251 case AArch64::LD1D_4Z_IMM_PSEUDO:
3252 case AArch64::ST1B_2Z_IMM:
3253 case AArch64::ST1B_2Z_STRIDED_IMM:
3254 case AArch64::ST1H_2Z_IMM:
3255 case AArch64::ST1H_2Z_STRIDED_IMM:
3256 case AArch64::ST1W_2Z_IMM:
3257 case AArch64::ST1W_2Z_STRIDED_IMM:
3258 case AArch64::ST1D_2Z_IMM:
3259 case AArch64::ST1D_2Z_STRIDED_IMM:
3260 case AArch64::LDNT1B_2Z_IMM_PSEUDO:
3261 case AArch64::LDNT1B_2Z_IMM:
3262 case AArch64::LDNT1B_2Z_STRIDED_IMM:
3263 case AArch64::LDNT1H_2Z_IMM_PSEUDO:
3264 case AArch64::LDNT1H_2Z_IMM:
3265 case AArch64::LDNT1H_2Z_STRIDED_IMM:
3266 case AArch64::LDNT1W_2Z_IMM_PSEUDO:
3267 case AArch64::LDNT1W_2Z_IMM:
3268 case AArch64::LDNT1W_2Z_STRIDED_IMM:
3269 case AArch64::LDNT1D_2Z_IMM_PSEUDO:
3270 case AArch64::LDNT1D_2Z_IMM:
3271 case AArch64::LDNT1D_2Z_STRIDED_IMM:
3272 case AArch64::STNT1B_2Z_IMM:
3273 case AArch64::STNT1B_2Z_STRIDED_IMM:
3274 case AArch64::STNT1H_2Z_IMM:
3275 case AArch64::STNT1H_2Z_STRIDED_IMM:
3276 case AArch64::STNT1W_2Z_IMM:
3277 case AArch64::STNT1W_2Z_STRIDED_IMM:
3278 case AArch64::STNT1D_2Z_IMM:
3279 case AArch64::STNT1D_2Z_STRIDED_IMM:
3280 case AArch64::ST1B_4Z_IMM:
3281 case AArch64::ST1B_4Z_STRIDED_IMM:
3282 case AArch64::ST1H_4Z_IMM:
3283 case AArch64::ST1H_4Z_STRIDED_IMM:
3284 case AArch64::ST1W_4Z_IMM:
3285 case AArch64::ST1W_4Z_STRIDED_IMM:
3286 case AArch64::ST1D_4Z_IMM:
3287 case AArch64::ST1D_4Z_STRIDED_IMM:
3288 case AArch64::LDNT1B_4Z_IMM_PSEUDO:
3289 case AArch64::LDNT1B_4Z_IMM:
3290 case AArch64::LDNT1B_4Z_STRIDED_IMM:
3291 case AArch64::LDNT1H_4Z_IMM_PSEUDO:
3292 case AArch64::LDNT1H_4Z_IMM:
3293 case AArch64::LDNT1H_4Z_STRIDED_IMM:
3294 case AArch64::LDNT1W_4Z_IMM_PSEUDO:
3295 case AArch64::LDNT1W_4Z_IMM:
3296 case AArch64::LDNT1W_4Z_STRIDED_IMM:
3297 case AArch64::LDNT1D_4Z_IMM_PSEUDO:
3298 case AArch64::LDNT1D_4Z_IMM:
3299 case AArch64::LDNT1D_4Z_STRIDED_IMM:
3300 case AArch64::STNT1B_4Z_IMM:
3301 case AArch64::STNT1B_4Z_STRIDED_IMM:
3302 case AArch64::STNT1H_4Z_IMM:
3303 case AArch64::STNT1H_4Z_STRIDED_IMM:
3304 case AArch64::STNT1W_4Z_IMM:
3305 case AArch64::STNT1W_4Z_STRIDED_IMM:
3306 case AArch64::STNT1D_4Z_IMM:
3307 case AArch64::STNT1D_4Z_STRIDED_IMM:
3308 return 3;
3309 case AArch64::LDPDpost:
3310 case AArch64::LDPDpre:
3311 case AArch64::LDPQpost:
3312 case AArch64::LDPQpre:
3313 case AArch64::LDPSpost:
3314 case AArch64::LDPSpre:
3315 case AArch64::LDPWpost:
3316 case AArch64::LDPWpre:
3317 case AArch64::LDPXpost:
3318 case AArch64::LDPXpre:
3319 case AArch64::STGPpre:
3320 case AArch64::STGPpost:
3321 case AArch64::STPDpost:
3322 case AArch64::STPDpre:
3323 case AArch64::STPQpost:
3324 case AArch64::STPQpre:
3325 case AArch64::STPSpost:
3326 case AArch64::STPSpre:
3327 case AArch64::STPWpost:
3328 case AArch64::STPWpre:
3329 case AArch64::STPXpost:
3330 case AArch64::STPXpre:
3331 return 4;
3332 }
3333}
3334
3336 switch (MI.getOpcode()) {
3337 default:
3338 return false;
3339 // Scaled instructions.
3340 case AArch64::STRSui:
3341 case AArch64::STRDui:
3342 case AArch64::STRQui:
3343 case AArch64::STRXui:
3344 case AArch64::STRWui:
3345 case AArch64::LDRSui:
3346 case AArch64::LDRDui:
3347 case AArch64::LDRQui:
3348 case AArch64::LDRXui:
3349 case AArch64::LDRWui:
3350 case AArch64::LDRSWui:
3351 // Unscaled instructions.
3352 case AArch64::STURSi:
3353 case AArch64::STRSpre:
3354 case AArch64::STURDi:
3355 case AArch64::STRDpre:
3356 case AArch64::STURQi:
3357 case AArch64::STRQpre:
3358 case AArch64::STURWi:
3359 case AArch64::STRWpre:
3360 case AArch64::STURXi:
3361 case AArch64::STRXpre:
3362 case AArch64::LDURSi:
3363 case AArch64::LDRSpre:
3364 case AArch64::LDURDi:
3365 case AArch64::LDRDpre:
3366 case AArch64::LDURQi:
3367 case AArch64::LDRQpre:
3368 case AArch64::LDURWi:
3369 case AArch64::LDRWpre:
3370 case AArch64::LDURXi:
3371 case AArch64::LDRXpre:
3372 case AArch64::LDURSWi:
3373 case AArch64::LDRSWpre:
3374 // SVE instructions.
3375 case AArch64::LDR_ZXI:
3376 case AArch64::STR_ZXI:
3377 return true;
3378 }
3379}
3380
3382 switch (MI.getOpcode()) {
3383 default:
3384 assert((!MI.isCall() || !MI.isReturn()) &&
3385 "Unexpected instruction - was a new tail call opcode introduced?");
3386 return false;
3387 case AArch64::TCRETURNdi:
3388 case AArch64::TCRETURNri:
3389 case AArch64::TCRETURNrix16x17:
3390 case AArch64::TCRETURNrix17:
3391 case AArch64::TCRETURNrinotx16:
3392 case AArch64::TCRETURNriALL:
3393 case AArch64::AUTH_TCRETURN:
3394 case AArch64::AUTH_TCRETURN_BTI:
3395 return true;
3396 }
3397}
3398
3400 switch (Opc) {
3401 default:
3402 llvm_unreachable("Opcode has no flag setting equivalent!");
3403 // 32-bit cases:
3404 case AArch64::ADDWri:
3405 return AArch64::ADDSWri;
3406 case AArch64::ADDWrr:
3407 return AArch64::ADDSWrr;
3408 case AArch64::ADDWrs:
3409 return AArch64::ADDSWrs;
3410 case AArch64::ADDWrx:
3411 return AArch64::ADDSWrx;
3412 case AArch64::ANDWri:
3413 return AArch64::ANDSWri;
3414 case AArch64::ANDWrr:
3415 return AArch64::ANDSWrr;
3416 case AArch64::ANDWrs:
3417 return AArch64::ANDSWrs;
3418 case AArch64::BICWrr:
3419 return AArch64::BICSWrr;
3420 case AArch64::BICWrs:
3421 return AArch64::BICSWrs;
3422 case AArch64::SUBWri:
3423 return AArch64::SUBSWri;
3424 case AArch64::SUBWrr:
3425 return AArch64::SUBSWrr;
3426 case AArch64::SUBWrs:
3427 return AArch64::SUBSWrs;
3428 case AArch64::SUBWrx:
3429 return AArch64::SUBSWrx;
3430 // 64-bit cases:
3431 case AArch64::ADDXri:
3432 return AArch64::ADDSXri;
3433 case AArch64::ADDXrr:
3434 return AArch64::ADDSXrr;
3435 case AArch64::ADDXrs:
3436 return AArch64::ADDSXrs;
3437 case AArch64::ADDXrx:
3438 return AArch64::ADDSXrx;
3439 case AArch64::ANDXri:
3440 return AArch64::ANDSXri;
3441 case AArch64::ANDXrr:
3442 return AArch64::ANDSXrr;
3443 case AArch64::ANDXrs:
3444 return AArch64::ANDSXrs;
3445 case AArch64::BICXrr:
3446 return AArch64::BICSXrr;
3447 case AArch64::BICXrs:
3448 return AArch64::BICSXrs;
3449 case AArch64::SUBXri:
3450 return AArch64::SUBSXri;
3451 case AArch64::SUBXrr:
3452 return AArch64::SUBSXrr;
3453 case AArch64::SUBXrs:
3454 return AArch64::SUBSXrs;
3455 case AArch64::SUBXrx:
3456 return AArch64::SUBSXrx;
3457 // SVE instructions:
3458 case AArch64::AND_PPzPP:
3459 return AArch64::ANDS_PPzPP;
3460 case AArch64::BIC_PPzPP:
3461 return AArch64::BICS_PPzPP;
3462 case AArch64::EOR_PPzPP:
3463 return AArch64::EORS_PPzPP;
3464 case AArch64::NAND_PPzPP:
3465 return AArch64::NANDS_PPzPP;
3466 case AArch64::NOR_PPzPP:
3467 return AArch64::NORS_PPzPP;
3468 case AArch64::ORN_PPzPP:
3469 return AArch64::ORNS_PPzPP;
3470 case AArch64::ORR_PPzPP:
3471 return AArch64::ORRS_PPzPP;
3472 case AArch64::BRKA_PPzP:
3473 return AArch64::BRKAS_PPzP;
3474 case AArch64::BRKPA_PPzPP:
3475 return AArch64::BRKPAS_PPzPP;
3476 case AArch64::BRKB_PPzP:
3477 return AArch64::BRKBS_PPzP;
3478 case AArch64::BRKPB_PPzPP:
3479 return AArch64::BRKPBS_PPzPP;
3480 case AArch64::BRKN_PPzP:
3481 return AArch64::BRKNS_PPzP;
3482 case AArch64::RDFFR_PPz:
3483 return AArch64::RDFFRS_PPz;
3484 case AArch64::PTRUE_B:
3485 return AArch64::PTRUES_B;
3486 }
3487}
3488
3489// Is this a candidate for ld/st merging or pairing? For example, we don't
3490// touch volatiles or load/stores that have a hint to avoid pair formation.
3492
3493 bool IsPreLdSt = isPreLdSt(MI);
3494
3495 // If this is a volatile load/store, don't mess with it.
3496 if (MI.hasOrderedMemoryRef())
3497 return false;
3498
3499 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
3500 // For Pre-inc LD/ST, the operand is shifted by one.
3501 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
3502 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
3503 "Expected a reg or frame index operand.");
3504
3505 // For Pre-indexed addressing quadword instructions, the third operand is the
3506 // immediate value.
3507 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
3508
3509 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
3510 return false;
3511
3512 // Can't merge/pair if the instruction modifies the base register.
3513 // e.g., ldr x0, [x0]
3514 // This case will never occur with an FI base.
3515 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
3516 // STR<S,D,Q,W,X>pre, it can be merged.
3517 // For example:
3518 // ldr q0, [x11, #32]!
3519 // ldr q1, [x11, #16]
3520 // to
3521 // ldp q0, q1, [x11, #32]!
3522 if (MI.getOperand(1).isReg() && !IsPreLdSt) {
3523 Register BaseReg = MI.getOperand(1).getReg();
3525 if (MI.modifiesRegister(BaseReg, TRI))
3526 return false;
3527 }
3528
3529 // Pairing SVE fills/spills is only valid for little-endian targets that
3530 // implement VLS 128.
3531 switch (MI.getOpcode()) {
3532 default:
3533 break;
3534 case AArch64::LDR_ZXI:
3535 case AArch64::STR_ZXI:
3536 if (!Subtarget.isLittleEndian() ||
3537 Subtarget.getSVEVectorSizeInBits() != 128)
3538 return false;
3539 }
3540
3541 // Check if this load/store has a hint to avoid pair formation.
3542 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
3544 return false;
3545
3546 // Do not pair any callee-save store/reload instructions in the
3547 // prologue/epilogue if the CFI information encoded the operations as separate
3548 // instructions, as that will cause the size of the actual prologue to mismatch
3549 // with the prologue size recorded in the Windows CFI.
3550 const MCAsmInfo &MAI = MI.getMF()->getTarget().getMCAsmInfo();
3551 bool NeedsWinCFI =
3552 MAI.usesWindowsCFI() && MI.getMF()->getFunction().needsUnwindTableEntry();
3553 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
3555 return false;
3556
3557 // On some CPUs quad load/store pairs are slower than two single load/stores.
3558 if (Subtarget.isPaired128Slow()) {
3559 switch (MI.getOpcode()) {
3560 default:
3561 break;
3562 case AArch64::LDURQi:
3563 case AArch64::STURQi:
3564 case AArch64::LDRQui:
3565 case AArch64::STRQui:
3566 return false;
3567 }
3568 }
3569
3570 return true;
3571}
3572
3575 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
3576 const TargetRegisterInfo *TRI) const {
3577 if (!LdSt.mayLoadOrStore())
3578 return false;
3579
3580 const MachineOperand *BaseOp;
3581 TypeSize WidthN(0, false);
3582 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
3583 WidthN, TRI))
3584 return false;
3585 // The maximum vscale is 16 under AArch64, return the maximal extent for the
3586 // vector.
3587 Width = LocationSize::precise(WidthN);
3588 BaseOps.push_back(BaseOp);
3589 return true;
3590}
3591
3592std::optional<ExtAddrMode>
3594 const TargetRegisterInfo *TRI) const {
3595 const MachineOperand *Base; // Filled with the base operand of MI.
3596 int64_t Offset; // Filled with the offset of MI.
3597 bool OffsetIsScalable;
3598 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
3599 return std::nullopt;
3600
3601 if (!Base->isReg())
3602 return std::nullopt;
3603 ExtAddrMode AM;
3604 AM.BaseReg = Base->getReg();
3605 AM.Displacement = Offset;
3606 AM.ScaledReg = 0;
3607 AM.Scale = 0;
3608 return AM;
3609}
3610
3612 Register Reg,
3613 const MachineInstr &AddrI,
3614 ExtAddrMode &AM) const {
3615 // Filter out instructions into which we cannot fold.
3616 unsigned NumBytes;
3617 int64_t OffsetScale = 1;
3618 switch (MemI.getOpcode()) {
3619 default:
3620 return false;
3621
3622 case AArch64::LDURQi:
3623 case AArch64::STURQi:
3624 NumBytes = 16;
3625 break;
3626
3627 case AArch64::LDURDi:
3628 case AArch64::STURDi:
3629 case AArch64::LDURXi:
3630 case AArch64::STURXi:
3631 NumBytes = 8;
3632 break;
3633
3634 case AArch64::LDURWi:
3635 case AArch64::LDURSWi:
3636 case AArch64::STURWi:
3637 NumBytes = 4;
3638 break;
3639
3640 case AArch64::LDURHi:
3641 case AArch64::STURHi:
3642 case AArch64::LDURHHi:
3643 case AArch64::STURHHi:
3644 case AArch64::LDURSHXi:
3645 case AArch64::LDURSHWi:
3646 NumBytes = 2;
3647 break;
3648
3649 case AArch64::LDRBroX:
3650 case AArch64::LDRBBroX:
3651 case AArch64::LDRSBXroX:
3652 case AArch64::LDRSBWroX:
3653 case AArch64::STRBroX:
3654 case AArch64::STRBBroX:
3655 case AArch64::LDURBi:
3656 case AArch64::LDURBBi:
3657 case AArch64::LDURSBXi:
3658 case AArch64::LDURSBWi:
3659 case AArch64::STURBi:
3660 case AArch64::STURBBi:
3661 case AArch64::LDRBui:
3662 case AArch64::LDRBBui:
3663 case AArch64::LDRSBXui:
3664 case AArch64::LDRSBWui:
3665 case AArch64::STRBui:
3666 case AArch64::STRBBui:
3667 NumBytes = 1;
3668 break;
3669
3670 case AArch64::LDRQroX:
3671 case AArch64::STRQroX:
3672 case AArch64::LDRQui:
3673 case AArch64::STRQui:
3674 NumBytes = 16;
3675 OffsetScale = 16;
3676 break;
3677
3678 case AArch64::LDRDroX:
3679 case AArch64::STRDroX:
3680 case AArch64::LDRXroX:
3681 case AArch64::STRXroX:
3682 case AArch64::LDRDui:
3683 case AArch64::STRDui:
3684 case AArch64::LDRXui:
3685 case AArch64::STRXui:
3686 NumBytes = 8;
3687 OffsetScale = 8;
3688 break;
3689
3690 case AArch64::LDRWroX:
3691 case AArch64::LDRSWroX:
3692 case AArch64::STRWroX:
3693 case AArch64::LDRWui:
3694 case AArch64::LDRSWui:
3695 case AArch64::STRWui:
3696 NumBytes = 4;
3697 OffsetScale = 4;
3698 break;
3699
3700 case AArch64::LDRHroX:
3701 case AArch64::STRHroX:
3702 case AArch64::LDRHHroX:
3703 case AArch64::STRHHroX:
3704 case AArch64::LDRSHXroX:
3705 case AArch64::LDRSHWroX:
3706 case AArch64::LDRHui:
3707 case AArch64::STRHui:
3708 case AArch64::LDRHHui:
3709 case AArch64::STRHHui:
3710 case AArch64::LDRSHXui:
3711 case AArch64::LDRSHWui:
3712 NumBytes = 2;
3713 OffsetScale = 2;
3714 break;
3715 }
3716
3717 // Check the fold operand is not the loaded/stored value.
3718 const MachineOperand &BaseRegOp = MemI.getOperand(0);
3719 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
3720 return false;
3721
3722 // Handle memory instructions with a [Reg, Reg] addressing mode.
3723 if (MemI.getOperand(2).isReg()) {
3724 // Bail if the addressing mode already includes extension of the offset
3725 // register.
3726 if (MemI.getOperand(3).getImm())
3727 return false;
3728
3729 // Check if we actually have a scaled offset.
3730 if (MemI.getOperand(4).getImm() == 0)
3731 OffsetScale = 1;
3732
3733 // If the address instructions is folded into the base register, then the
3734 // addressing mode must not have a scale. Then we can swap the base and the
3735 // scaled registers.
3736 if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
3737 return false;
3738
3739 switch (AddrI.getOpcode()) {
3740 default:
3741 return false;
3742
3743 case AArch64::SBFMXri:
3744 // sxtw Xa, Wm
3745 // ldr Xd, [Xn, Xa, lsl #N]
3746 // ->
3747 // ldr Xd, [Xn, Wm, sxtw #N]
3748 if (AddrI.getOperand(2).getImm() != 0 ||
3749 AddrI.getOperand(3).getImm() != 31)
3750 return false;
3751
3752 AM.BaseReg = MemI.getOperand(1).getReg();
3753 if (AM.BaseReg == Reg)
3754 AM.BaseReg = MemI.getOperand(2).getReg();
3755 AM.ScaledReg = AddrI.getOperand(1).getReg();
3756 AM.Scale = OffsetScale;
3757 AM.Displacement = 0;
3759 return true;
3760
3761 case TargetOpcode::SUBREG_TO_REG: {
3762 // mov Wa, Wm
3763 // ldr Xd, [Xn, Xa, lsl #N]
3764 // ->
3765 // ldr Xd, [Xn, Wm, uxtw #N]
3766
3767 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
3768 if (AddrI.getOperand(2).getImm() != AArch64::sub_32)
3769 return false;
3770
3771 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
3772 Register OffsetReg = AddrI.getOperand(1).getReg();
3773 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
3774 return false;
3775
3776 const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
3777 if (DefMI.getOpcode() != AArch64::ORRWrs ||
3778 DefMI.getOperand(1).getReg() != AArch64::WZR ||
3779 DefMI.getOperand(3).getImm() != 0)
3780 return false;
3781
3782 AM.BaseReg = MemI.getOperand(1).getReg();
3783 if (AM.BaseReg == Reg)
3784 AM.BaseReg = MemI.getOperand(2).getReg();
3785 AM.ScaledReg = DefMI.getOperand(2).getReg();
3786 AM.Scale = OffsetScale;
3787 AM.Displacement = 0;
3789 return true;
3790 }
3791 }
3792 }
3793
3794 // Handle memory instructions with a [Reg, #Imm] addressing mode.
3795
3796 // Check we are not breaking a potential conversion to an LDP.
3797 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
3798 int64_t NewOffset) -> bool {
3799 int64_t MinOffset, MaxOffset;
3800 switch (NumBytes) {
3801 default:
3802 return true;
3803 case 4:
3804 MinOffset = -256;
3805 MaxOffset = 252;
3806 break;
3807 case 8:
3808 MinOffset = -512;
3809 MaxOffset = 504;
3810 break;
3811 case 16:
3812 MinOffset = -1024;
3813 MaxOffset = 1008;
3814 break;
3815 }
3816 return OldOffset < MinOffset || OldOffset > MaxOffset ||
3817 (NewOffset >= MinOffset && NewOffset <= MaxOffset);
3818 };
3819 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
3820 int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
3821 int64_t NewOffset = OldOffset + Disp;
3822 if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
3823 return false;
3824 // If the old offset would fit into an LDP, but the new offset wouldn't,
3825 // bail out.
3826 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
3827 return false;
3828 AM.BaseReg = AddrI.getOperand(1).getReg();
3829 AM.ScaledReg = 0;
3830 AM.Scale = 0;
3831 AM.Displacement = NewOffset;
3833 return true;
3834 };
3835
3836 auto canFoldAddRegIntoAddrMode =
3837 [&](int64_t Scale,
3839 if (MemI.getOperand(2).getImm() != 0)
3840 return false;
3841 if ((unsigned)Scale != Scale)
3842 return false;
3843 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
3844 return false;
3845 AM.BaseReg = AddrI.getOperand(1).getReg();
3846 AM.ScaledReg = AddrI.getOperand(2).getReg();
3847 AM.Scale = Scale;
3848 AM.Displacement = 0;
3849 AM.Form = Form;
3850 return true;
3851 };
3852
3853 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
3854 unsigned Opcode = MemI.getOpcode();
3855 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
3856 Subtarget.isSTRQroSlow();
3857 };
3858
3859 int64_t Disp = 0;
3860 const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
3861 switch (AddrI.getOpcode()) {
3862 default:
3863 return false;
3864
3865 case AArch64::ADDXri:
3866 // add Xa, Xn, #N
3867 // ldr Xd, [Xa, #M]
3868 // ->
3869 // ldr Xd, [Xn, #N'+M]
3870 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3871 return canFoldAddSubImmIntoAddrMode(Disp);
3872
3873 case AArch64::SUBXri:
3874 // sub Xa, Xn, #N
3875 // ldr Xd, [Xa, #M]
3876 // ->
3877 // ldr Xd, [Xn, #N'+M]
3878 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3879 return canFoldAddSubImmIntoAddrMode(-Disp);
3880
3881 case AArch64::ADDXrs: {
3882 // add Xa, Xn, Xm, lsl #N
3883 // ldr Xd, [Xa]
3884 // ->
3885 // ldr Xd, [Xn, Xm, lsl #N]
3886
3887 // Don't fold the add if the result would be slower, unless optimising for
3888 // size.
3889 unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3891 return false;
3892 Shift = AArch64_AM::getShiftValue(Shift);
3893 if (!OptSize) {
3894 if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
3895 return false;
3896 if (avoidSlowSTRQ(MemI))
3897 return false;
3898 }
3899 return canFoldAddRegIntoAddrMode(1ULL << Shift);
3900 }
3901
3902 case AArch64::ADDXrr:
3903 // add Xa, Xn, Xm
3904 // ldr Xd, [Xa]
3905 // ->
3906 // ldr Xd, [Xn, Xm, lsl #0]
3907
3908 // Don't fold the add if the result would be slower, unless optimising for
3909 // size.
3910 if (!OptSize && avoidSlowSTRQ(MemI))
3911 return false;
3912 return canFoldAddRegIntoAddrMode(1);
3913
3914 case AArch64::ADDXrx:
3915 // add Xa, Xn, Wm, {s,u}xtw #N
3916 // ldr Xd, [Xa]
3917 // ->
3918 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
3919
3920 // Don't fold the add if the result would be slower, unless optimising for
3921 // size.
3922 if (!OptSize && avoidSlowSTRQ(MemI))
3923 return false;
3924
3925 // Can fold only sign-/zero-extend of a word.
3926 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3928 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3929 return false;
3930
3931 return canFoldAddRegIntoAddrMode(
3932 1ULL << AArch64_AM::getArithShiftValue(Imm),
3935 }
3936}
3937
3938// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3939// return the opcode of an instruction performing the same operation, but using
3940// the [Reg, Reg] addressing mode.
3941static unsigned regOffsetOpcode(unsigned Opcode) {
3942 switch (Opcode) {
3943 default:
3944 llvm_unreachable("Address folding not implemented for instruction");
3945
3946 case AArch64::LDURQi:
3947 case AArch64::LDRQui:
3948 return AArch64::LDRQroX;
3949 case AArch64::STURQi:
3950 case AArch64::STRQui:
3951 return AArch64::STRQroX;
3952 case AArch64::LDURDi:
3953 case AArch64::LDRDui:
3954 return AArch64::LDRDroX;
3955 case AArch64::STURDi:
3956 case AArch64::STRDui:
3957 return AArch64::STRDroX;
3958 case AArch64::LDURXi:
3959 case AArch64::LDRXui:
3960 return AArch64::LDRXroX;
3961 case AArch64::STURXi:
3962 case AArch64::STRXui:
3963 return AArch64::STRXroX;
3964 case AArch64::LDURWi:
3965 case AArch64::LDRWui:
3966 return AArch64::LDRWroX;
3967 case AArch64::LDURSWi:
3968 case AArch64::LDRSWui:
3969 return AArch64::LDRSWroX;
3970 case AArch64::STURWi:
3971 case AArch64::STRWui:
3972 return AArch64::STRWroX;
3973 case AArch64::LDURHi:
3974 case AArch64::LDRHui:
3975 return AArch64::LDRHroX;
3976 case AArch64::STURHi:
3977 case AArch64::STRHui:
3978 return AArch64::STRHroX;
3979 case AArch64::LDURHHi:
3980 case AArch64::LDRHHui:
3981 return AArch64::LDRHHroX;
3982 case AArch64::STURHHi:
3983 case AArch64::STRHHui:
3984 return AArch64::STRHHroX;
3985 case AArch64::LDURSHXi:
3986 case AArch64::LDRSHXui:
3987 return AArch64::LDRSHXroX;
3988 case AArch64::LDURSHWi:
3989 case AArch64::LDRSHWui:
3990 return AArch64::LDRSHWroX;
3991 case AArch64::LDURBi:
3992 case AArch64::LDRBui:
3993 return AArch64::LDRBroX;
3994 case AArch64::LDURBBi:
3995 case AArch64::LDRBBui:
3996 return AArch64::LDRBBroX;
3997 case AArch64::LDURSBXi:
3998 case AArch64::LDRSBXui:
3999 return AArch64::LDRSBXroX;
4000 case AArch64::LDURSBWi:
4001 case AArch64::LDRSBWui:
4002 return AArch64::LDRSBWroX;
4003 case AArch64::STURBi:
4004 case AArch64::STRBui:
4005 return AArch64::STRBroX;
4006 case AArch64::STURBBi:
4007 case AArch64::STRBBui:
4008 return AArch64::STRBBroX;
4009 }
4010}
4011
4012// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
4013// the opcode of an instruction performing the same operation, but using the
4014// [Reg, #Imm] addressing mode with scaled offset.
4015unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
4016 switch (Opcode) {
4017 default:
4018 llvm_unreachable("Address folding not implemented for instruction");
4019
4020 case AArch64::LDURQi:
4021 Scale = 16;
4022 return AArch64::LDRQui;
4023 case AArch64::STURQi:
4024 Scale = 16;
4025 return AArch64::STRQui;
4026 case AArch64::LDURDi:
4027 Scale = 8;
4028 return AArch64::LDRDui;
4029 case AArch64::STURDi:
4030 Scale = 8;
4031 return AArch64::STRDui;
4032 case AArch64::LDURXi:
4033 Scale = 8;
4034 return AArch64::LDRXui;
4035 case AArch64::STURXi:
4036 Scale = 8;
4037 return AArch64::STRXui;
4038 case AArch64::LDURWi:
4039 Scale = 4;
4040 return AArch64::LDRWui;
4041 case AArch64::LDURSWi:
4042 Scale = 4;
4043 return AArch64::LDRSWui;
4044 case AArch64::STURWi:
4045 Scale = 4;
4046 return AArch64::STRWui;
4047 case AArch64::LDURHi:
4048 Scale = 2;
4049 return AArch64::LDRHui;
4050 case AArch64::STURHi:
4051 Scale = 2;
4052 return AArch64::STRHui;
4053 case AArch64::LDURHHi:
4054 Scale = 2;
4055 return AArch64::LDRHHui;
4056 case AArch64::STURHHi:
4057 Scale = 2;
4058 return AArch64::STRHHui;
4059 case AArch64::LDURSHXi:
4060 Scale = 2;
4061 return AArch64::LDRSHXui;
4062 case AArch64::LDURSHWi:
4063 Scale = 2;
4064 return AArch64::LDRSHWui;
4065 case AArch64::LDURBi:
4066 Scale = 1;
4067 return AArch64::LDRBui;
4068 case AArch64::LDURBBi:
4069 Scale = 1;
4070 return AArch64::LDRBBui;
4071 case AArch64::LDURSBXi:
4072 Scale = 1;
4073 return AArch64::LDRSBXui;
4074 case AArch64::LDURSBWi:
4075 Scale = 1;
4076 return AArch64::LDRSBWui;
4077 case AArch64::STURBi:
4078 Scale = 1;
4079 return AArch64::STRBui;
4080 case AArch64::STURBBi:
4081 Scale = 1;
4082 return AArch64::STRBBui;
4083 case AArch64::LDRQui:
4084 case AArch64::STRQui:
4085 Scale = 16;
4086 return Opcode;
4087 case AArch64::LDRDui:
4088 case AArch64::STRDui:
4089 case AArch64::LDRXui:
4090 case AArch64::STRXui:
4091 Scale = 8;
4092 return Opcode;
4093 case AArch64::LDRWui:
4094 case AArch64::LDRSWui:
4095 case AArch64::STRWui:
4096 Scale = 4;
4097 return Opcode;
4098 case AArch64::LDRHui:
4099 case AArch64::STRHui:
4100 case AArch64::LDRHHui:
4101 case AArch64::STRHHui:
4102 case AArch64::LDRSHXui:
4103 case AArch64::LDRSHWui:
4104 Scale = 2;
4105 return Opcode;
4106 case AArch64::LDRBui:
4107 case AArch64::LDRBBui:
4108 case AArch64::LDRSBXui:
4109 case AArch64::LDRSBWui:
4110 case AArch64::STRBui:
4111 case AArch64::STRBBui:
4112 Scale = 1;
4113 return Opcode;
4114 }
4115}
4116
4117// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
4118// the opcode of an instruction performing the same operation, but using the
4119// [Reg, #Imm] addressing mode with unscaled offset.
4120unsigned unscaledOffsetOpcode(unsigned Opcode) {
4121 switch (Opcode) {
4122 default:
4123 llvm_unreachable("Address folding not implemented for instruction");
4124
4125 case AArch64::LDURQi:
4126 case AArch64::STURQi:
4127 case AArch64::LDURDi:
4128 case AArch64::STURDi:
4129 case AArch64::LDURXi:
4130 case AArch64::STURXi:
4131 case AArch64::LDURWi:
4132 case AArch64::LDURSWi:
4133 case AArch64::STURWi:
4134 case AArch64::LDURHi:
4135 case AArch64::STURHi:
4136 case AArch64::LDURHHi:
4137 case AArch64::STURHHi:
4138 case AArch64::LDURSHXi:
4139 case AArch64::LDURSHWi:
4140 case AArch64::LDURBi:
4141 case AArch64::STURBi:
4142 case AArch64::LDURBBi:
4143 case AArch64::STURBBi:
4144 case AArch64::LDURSBWi:
4145 case AArch64::LDURSBXi:
4146 return Opcode;
4147 case AArch64::LDRQui:
4148 return AArch64::LDURQi;
4149 case AArch64::STRQui:
4150 return AArch64::STURQi;
4151 case AArch64::LDRDui:
4152 return AArch64::LDURDi;
4153 case AArch64::STRDui:
4154 return AArch64::STURDi;
4155 case AArch64::LDRXui:
4156 return AArch64::LDURXi;
4157 case AArch64::STRXui:
4158 return AArch64::STURXi;
4159 case AArch64::LDRWui:
4160 return AArch64::LDURWi;
4161 case AArch64::LDRSWui:
4162 return AArch64::LDURSWi;
4163 case AArch64::STRWui:
4164 return AArch64::STURWi;
4165 case AArch64::LDRHui:
4166 return AArch64::LDURHi;
4167 case AArch64::STRHui:
4168 return AArch64::STURHi;
4169 case AArch64::LDRHHui:
4170 return AArch64::LDURHHi;
4171 case AArch64::STRHHui:
4172 return AArch64::STURHHi;
4173 case AArch64::LDRSHXui:
4174 return AArch64::LDURSHXi;
4175 case AArch64::LDRSHWui:
4176 return AArch64::LDURSHWi;
4177 case AArch64::LDRBBui:
4178 return AArch64::LDURBBi;
4179 case AArch64::LDRBui:
4180 return AArch64::LDURBi;
4181 case AArch64::STRBBui:
4182 return AArch64::STURBBi;
4183 case AArch64::STRBui:
4184 return AArch64::STURBi;
4185 case AArch64::LDRSBWui:
4186 return AArch64::LDURSBWi;
4187 case AArch64::LDRSBXui:
4188 return AArch64::LDURSBXi;
4189 }
4190}
4191
4192// Given the opcode of a memory load/store instruction, return the opcode of an
4193// instruction performing the same operation, but using
4194// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
4195// offset register.
4196static unsigned offsetExtendOpcode(unsigned Opcode) {
4197 switch (Opcode) {
4198 default:
4199 llvm_unreachable("Address folding not implemented for instruction");
4200
4201 case AArch64::LDRQroX:
4202 case AArch64::LDURQi:
4203 case AArch64::LDRQui:
4204 return AArch64::LDRQroW;
4205 case AArch64::STRQroX:
4206 case AArch64::STURQi:
4207 case AArch64::STRQui:
4208 return AArch64::STRQroW;
4209 case AArch64::LDRDroX:
4210 case AArch64::LDURDi:
4211 case AArch64::LDRDui:
4212 return AArch64::LDRDroW;
4213 case AArch64::STRDroX:
4214 case AArch64::STURDi:
4215 case AArch64::STRDui:
4216 return AArch64::STRDroW;
4217 case AArch64::LDRXroX:
4218 case AArch64::LDURXi:
4219 case AArch64::LDRXui:
4220 return AArch64::LDRXroW;
4221 case AArch64::STRXroX:
4222 case AArch64::STURXi:
4223 case AArch64::STRXui:
4224 return AArch64::STRXroW;
4225 case AArch64::LDRWroX:
4226 case AArch64::LDURWi:
4227 case AArch64::LDRWui:
4228 return AArch64::LDRWroW;
4229 case AArch64::LDRSWroX:
4230 case AArch64::LDURSWi:
4231 case AArch64::LDRSWui:
4232 return AArch64::LDRSWroW;
4233 case AArch64::STRWroX:
4234 case AArch64::STURWi:
4235 case AArch64::STRWui:
4236 return AArch64::STRWroW;
4237 case AArch64::LDRHroX:
4238 case AArch64::LDURHi:
4239 case AArch64::LDRHui:
4240 return AArch64::LDRHroW;
4241 case AArch64::STRHroX:
4242 case AArch64::STURHi:
4243 case AArch64::STRHui:
4244 return AArch64::STRHroW;
4245 case AArch64::LDRHHroX:
4246 case AArch64::LDURHHi:
4247 case AArch64::LDRHHui:
4248 return AArch64::LDRHHroW;
4249 case AArch64::STRHHroX:
4250 case AArch64::STURHHi:
4251 case AArch64::STRHHui:
4252 return AArch64::STRHHroW;
4253 case AArch64::LDRSHXroX:
4254 case AArch64::LDURSHXi:
4255 case AArch64::LDRSHXui:
4256 return AArch64::LDRSHXroW;
4257 case AArch64::LDRSHWroX:
4258 case AArch64::LDURSHWi:
4259 case AArch64::LDRSHWui:
4260 return AArch64::LDRSHWroW;
4261 case AArch64::LDRBroX:
4262 case AArch64::LDURBi:
4263 case AArch64::LDRBui:
4264 return AArch64::LDRBroW;
4265 case AArch64::LDRBBroX:
4266 case AArch64::LDURBBi:
4267 case AArch64::LDRBBui:
4268 return AArch64::LDRBBroW;
4269 case AArch64::LDRSBXroX:
4270 case AArch64::LDURSBXi:
4271 case AArch64::LDRSBXui:
4272 return AArch64::LDRSBXroW;
4273 case AArch64::LDRSBWroX:
4274 case AArch64::LDURSBWi:
4275 case AArch64::LDRSBWui:
4276 return AArch64::LDRSBWroW;
4277 case AArch64::STRBroX:
4278 case AArch64::STURBi:
4279 case AArch64::STRBui:
4280 return AArch64::STRBroW;
4281 case AArch64::STRBBroX:
4282 case AArch64::STURBBi:
4283 case AArch64::STRBBui:
4284 return AArch64::STRBBroW;
4285 }
4286}
4287
4289 const ExtAddrMode &AM) const {
4290
4291 const DebugLoc &DL = MemI.getDebugLoc();
4292 MachineBasicBlock &MBB = *MemI.getParent();
4293 MachineRegisterInfo &MRI = MemI.getMF()->getRegInfo();
4294
4296 if (AM.ScaledReg) {
4297 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
4298 unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
4299 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
4300 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
4301 .addReg(MemI.getOperand(0).getReg(),
4302 getDefRegState(MemI.mayLoad()))
4303 .addReg(AM.BaseReg)
4304 .addReg(AM.ScaledReg)
4305 .addImm(0)
4306 .addImm(AM.Scale > 1)
4307 .setMemRefs(MemI.memoperands())
4308 .setMIFlags(MemI.getFlags());
4309 return B.getInstr();
4310 }
4311
4312 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
4313 "Addressing mode not supported for folding");
4314
4315 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
4316 unsigned Scale = 1;
4317 unsigned Opcode = MemI.getOpcode();
4318 if (isInt<9>(AM.Displacement))
4319 Opcode = unscaledOffsetOpcode(Opcode);
4320 else
4321 Opcode = scaledOffsetOpcode(Opcode, Scale);
4322
4323 auto B =
4324 BuildMI(MBB, MemI, DL, get(Opcode))
4325 .addReg(MemI.getOperand(0).getReg(), getDefRegState(MemI.mayLoad()))
4326 .addReg(AM.BaseReg)
4327 .addImm(AM.Displacement / Scale)
4328 .setMemRefs(MemI.memoperands())
4329 .setMIFlags(MemI.getFlags());
4330 return B.getInstr();
4331 }
4332
4335 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
4336 assert(AM.ScaledReg && !AM.Displacement &&
4337 "Address offset can be a register or an immediate, but not both");
4338 unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
4339 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
4340 // Make sure the offset register is in the correct register class.
4341 Register OffsetReg = AM.ScaledReg;
4342 const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
4343 if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
4344 OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
4345 BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
4346 .addReg(AM.ScaledReg, {}, AArch64::sub_32);
4347 }
4348 auto B =
4349 BuildMI(MBB, MemI, DL, get(Opcode))
4350 .addReg(MemI.getOperand(0).getReg(), getDefRegState(MemI.mayLoad()))
4351 .addReg(AM.BaseReg)
4352 .addReg(OffsetReg)
4354 .addImm(AM.Scale != 1)
4355 .setMemRefs(MemI.memoperands())
4356 .setMIFlags(MemI.getFlags());
4357
4358 return B.getInstr();
4359 }
4360
4362 "Function must not be called with an addressing mode it can't handle");
4363}
4364
4365/// Return true if the opcode is a post-index ld/st instruction, which really
4366/// loads from base+0.
4367static bool isPostIndexLdStOpcode(unsigned Opcode) {
4368 switch (Opcode) {
4369 default:
4370 return false;
4371 case AArch64::LD1Fourv16b_POST:
4372 case AArch64::LD1Fourv1d_POST:
4373 case AArch64::LD1Fourv2d_POST:
4374 case AArch64::LD1Fourv2s_POST:
4375 case AArch64::LD1Fourv4h_POST:
4376 case AArch64::LD1Fourv4s_POST:
4377 case AArch64::LD1Fourv8b_POST:
4378 case AArch64::LD1Fourv8h_POST:
4379 case AArch64::LD1Onev16b_POST:
4380 case AArch64::LD1Onev1d_POST:
4381 case AArch64::LD1Onev2d_POST:
4382 case AArch64::LD1Onev2s_POST:
4383 case AArch64::LD1Onev4h_POST:
4384 case AArch64::LD1Onev4s_POST:
4385 case AArch64::LD1Onev8b_POST:
4386 case AArch64::LD1Onev8h_POST:
4387 case AArch64::LD1Rv16b_POST:
4388 case AArch64::LD1Rv1d_POST:
4389 case AArch64::LD1Rv2d_POST:
4390 case AArch64::LD1Rv2s_POST:
4391 case AArch64::LD1Rv4h_POST:
4392 case AArch64::LD1Rv4s_POST:
4393 case AArch64::LD1Rv8b_POST:
4394 case AArch64::LD1Rv8h_POST:
4395 case AArch64::LD1Threev16b_POST:
4396 case AArch64::LD1Threev1d_POST:
4397 case AArch64::LD1Threev2d_POST:
4398 case AArch64::LD1Threev2s_POST:
4399 case AArch64::LD1Threev4h_POST:
4400 case AArch64::LD1Threev4s_POST:
4401 case AArch64::LD1Threev8b_POST:
4402 case AArch64::LD1Threev8h_POST:
4403 case AArch64::LD1Twov16b_POST:
4404 case AArch64::LD1Twov1d_POST:
4405 case AArch64::LD1Twov2d_POST:
4406 case AArch64::LD1Twov2s_POST:
4407 case AArch64::LD1Twov4h_POST:
4408 case AArch64::LD1Twov4s_POST:
4409 case AArch64::LD1Twov8b_POST:
4410 case AArch64::LD1Twov8h_POST:
4411 case AArch64::LD1i16_POST:
4412 case AArch64::LD1i32_POST:
4413 case AArch64::LD1i64_POST:
4414 case AArch64::LD1i8_POST:
4415 case AArch64::LD2Rv16b_POST:
4416 case AArch64::LD2Rv1d_POST:
4417 case AArch64::LD2Rv2d_POST:
4418 case AArch64::LD2Rv2s_POST:
4419 case AArch64::LD2Rv4h_POST:
4420 case AArch64::LD2Rv4s_POST:
4421 case AArch64::LD2Rv8b_POST:
4422 case AArch64::LD2Rv8h_POST:
4423 case AArch64::LD2Twov16b_POST:
4424 case AArch64::LD2Twov2d_POST:
4425 case AArch64::LD2Twov2s_POST:
4426 case AArch64::LD2Twov4h_POST:
4427 case AArch64::LD2Twov4s_POST:
4428 case AArch64::LD2Twov8b_POST:
4429 case AArch64::LD2Twov8h_POST:
4430 case AArch64::LD2i16_POST:
4431 case AArch64::LD2i32_POST:
4432 case AArch64::LD2i64_POST:
4433 case AArch64::LD2i8_POST:
4434 case AArch64::LD3Rv16b_POST:
4435 case AArch64::LD3Rv1d_POST:
4436 case AArch64::LD3Rv2d_POST:
4437 case AArch64::LD3Rv2s_POST:
4438 case AArch64::LD3Rv4h_POST:
4439 case AArch64::LD3Rv4s_POST:
4440 case AArch64::LD3Rv8b_POST:
4441 case AArch64::LD3Rv8h_POST:
4442 case AArch64::LD3Threev16b_POST:
4443 case AArch64::LD3Threev2d_POST:
4444 case AArch64::LD3Threev2s_POST:
4445 case AArch64::LD3Threev4h_POST:
4446 case AArch64::LD3Threev4s_POST:
4447 case AArch64::LD3Threev8b_POST:
4448 case AArch64::LD3Threev8h_POST:
4449 case AArch64::LD3i16_POST:
4450 case AArch64::LD3i32_POST:
4451 case AArch64::LD3i64_POST:
4452 case AArch64::LD3i8_POST:
4453 case AArch64::LD4Fourv16b_POST:
4454 case AArch64::LD4Fourv2d_POST:
4455 case AArch64::LD4Fourv2s_POST:
4456 case AArch64::LD4Fourv4h_POST:
4457 case AArch64::LD4Fourv4s_POST:
4458 case AArch64::LD4Fourv8b_POST:
4459 case AArch64::LD4Fourv8h_POST:
4460 case AArch64::LD4Rv16b_POST:
4461 case AArch64::LD4Rv1d_POST:
4462 case AArch64::LD4Rv2d_POST:
4463 case AArch64::LD4Rv2s_POST:
4464 case AArch64::LD4Rv4h_POST:
4465 case AArch64::LD4Rv4s_POST:
4466 case AArch64::LD4Rv8b_POST:
4467 case AArch64::LD4Rv8h_POST:
4468 case AArch64::LD4i16_POST:
4469 case AArch64::LD4i32_POST:
4470 case AArch64::LD4i64_POST:
4471 case AArch64::LD4i8_POST:
4472 case AArch64::LDAPRWpost:
4473 case AArch64::LDAPRXpost:
4474 case AArch64::LDIAPPWpost:
4475 case AArch64::LDIAPPXpost:
4476 case AArch64::LDPDpost:
4477 case AArch64::LDPQpost:
4478 case AArch64::LDPSWpost:
4479 case AArch64::LDPSpost:
4480 case AArch64::LDPWpost:
4481 case AArch64::LDPXpost:
4482 case AArch64::LDRBBpost:
4483 case AArch64::LDRBpost:
4484 case AArch64::LDRDpost:
4485 case AArch64::LDRHHpost:
4486 case AArch64::LDRHpost:
4487 case AArch64::LDRQpost:
4488 case AArch64::LDRSBWpost:
4489 case AArch64::LDRSBXpost:
4490 case AArch64::LDRSHWpost:
4491 case AArch64::LDRSHXpost:
4492 case AArch64::LDRSWpost:
4493 case AArch64::LDRSpost:
4494 case AArch64::LDRWpost:
4495 case AArch64::LDRXpost:
4496 case AArch64::ST1Fourv16b_POST:
4497 case AArch64::ST1Fourv1d_POST:
4498 case AArch64::ST1Fourv2d_POST:
4499 case AArch64::ST1Fourv2s_POST:
4500 case AArch64::ST1Fourv4h_POST:
4501 case AArch64::ST1Fourv4s_POST:
4502 case AArch64::ST1Fourv8b_POST:
4503 case AArch64::ST1Fourv8h_POST:
4504 case AArch64::ST1Onev16b_POST:
4505 case AArch64::ST1Onev1d_POST:
4506 case AArch64::ST1Onev2d_POST:
4507 case AArch64::ST1Onev2s_POST:
4508 case AArch64::ST1Onev4h_POST:
4509 case AArch64::ST1Onev4s_POST:
4510 case AArch64::ST1Onev8b_POST:
4511 case AArch64::ST1Onev8h_POST:
4512 case AArch64::ST1Threev16b_POST:
4513 case AArch64::ST1Threev1d_POST:
4514 case AArch64::ST1Threev2d_POST:
4515 case AArch64::ST1Threev2s_POST:
4516 case AArch64::ST1Threev4h_POST:
4517 case AArch64::ST1Threev4s_POST:
4518 case AArch64::ST1Threev8b_POST:
4519 case AArch64::ST1Threev8h_POST:
4520 case AArch64::ST1Twov16b_POST:
4521 case AArch64::ST1Twov1d_POST:
4522 case AArch64::ST1Twov2d_POST:
4523 case AArch64::ST1Twov2s_POST:
4524 case AArch64::ST1Twov4h_POST:
4525 case AArch64::ST1Twov4s_POST:
4526 case AArch64::ST1Twov8b_POST:
4527 case AArch64::ST1Twov8h_POST:
4528 case AArch64::ST1i16_POST:
4529 case AArch64::ST1i32_POST:
4530 case AArch64::ST1i64_POST:
4531 case AArch64::ST1i8_POST:
4532 case AArch64::ST2GPostIndex:
4533 case AArch64::ST2Twov16b_POST:
4534 case AArch64::ST2Twov2d_POST:
4535 case AArch64::ST2Twov2s_POST:
4536 case AArch64::ST2Twov4h_POST:
4537 case AArch64::ST2Twov4s_POST:
4538 case AArch64::ST2Twov8b_POST:
4539 case AArch64::ST2Twov8h_POST:
4540 case AArch64::ST2i16_POST:
4541 case AArch64::ST2i32_POST:
4542 case AArch64::ST2i64_POST:
4543 case AArch64::ST2i8_POST:
4544 case AArch64::ST3Threev16b_POST:
4545 case AArch64::ST3Threev2d_POST:
4546 case AArch64::ST3Threev2s_POST:
4547 case AArch64::ST3Threev4h_POST:
4548 case AArch64::ST3Threev4s_POST:
4549 case AArch64::ST3Threev8b_POST:
4550 case AArch64::ST3Threev8h_POST:
4551 case AArch64::ST3i16_POST:
4552 case AArch64::ST3i32_POST:
4553 case AArch64::ST3i64_POST:
4554 case AArch64::ST3i8_POST:
4555 case AArch64::ST4Fourv16b_POST:
4556 case AArch64::ST4Fourv2d_POST:
4557 case AArch64::ST4Fourv2s_POST:
4558 case AArch64::ST4Fourv4h_POST:
4559 case AArch64::ST4Fourv4s_POST:
4560 case AArch64::ST4Fourv8b_POST:
4561 case AArch64::ST4Fourv8h_POST:
4562 case AArch64::ST4i16_POST:
4563 case AArch64::ST4i32_POST:
4564 case AArch64::ST4i64_POST:
4565 case AArch64::ST4i8_POST:
4566 case AArch64::STGPostIndex:
4567 case AArch64::STGPpost:
4568 case AArch64::STPDpost:
4569 case AArch64::STPQpost:
4570 case AArch64::STPSpost:
4571 case AArch64::STPWpost:
4572 case AArch64::STPXpost:
4573 case AArch64::STRBBpost:
4574 case AArch64::STRBpost:
4575 case AArch64::STRDpost:
4576 case AArch64::STRHHpost:
4577 case AArch64::STRHpost:
4578 case AArch64::STRQpost:
4579 case AArch64::STRSpost:
4580 case AArch64::STRWpost:
4581 case AArch64::STRXpost:
4582 case AArch64::STZ2GPostIndex:
4583 case AArch64::STZGPostIndex:
4584 return true;
4585 }
4586}
4587
4589 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
4590 bool &OffsetIsScalable, TypeSize &Width,
4591 const TargetRegisterInfo *TRI) const {
4592 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4593 // Handle only loads/stores with base register followed by immediate offset.
4594 if (LdSt.getNumExplicitOperands() == 3) {
4595 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
4596 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
4597 !LdSt.getOperand(2).isImm())
4598 return false;
4599 } else if (LdSt.getNumExplicitOperands() == 4) {
4600 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
4601 if (!LdSt.getOperand(1).isReg() ||
4602 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
4603 !LdSt.getOperand(3).isImm())
4604 return false;
4605 } else
4606 return false;
4607
4608 // Get the scaling factor for the instruction and set the width for the
4609 // instruction.
4610 TypeSize Scale(0U, false);
4611 int64_t Dummy1, Dummy2;
4612
4613 // If this returns false, then it's an instruction we don't want to handle.
4614 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
4615 return false;
4616
4617 // Compute the offset. Offset is calculated as the immediate operand
4618 // multiplied by the scaling factor. Unscaled instructions have scaling factor
4619 // set to 1. Postindex are a special case which have an offset of 0.
4620 if (isPostIndexLdStOpcode(LdSt.getOpcode())) {
4621 BaseOp = &LdSt.getOperand(2);
4622 Offset = 0;
4623 } else if (LdSt.getNumExplicitOperands() == 3) {
4624 BaseOp = &LdSt.getOperand(1);
4625 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
4626 } else {
4627 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
4628 BaseOp = &LdSt.getOperand(2);
4629 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
4630 }
4631 OffsetIsScalable = Scale.isScalable();
4632
4633 return BaseOp->isReg() || BaseOp->isFI();
4634}
4635
4638 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4639 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
4640 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
4641 return OfsOp;
4642}
4643
4644bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
4645 TypeSize &Width, int64_t &MinOffset,
4646 int64_t &MaxOffset) {
4647 switch (Opcode) {
4648 // Not a memory operation or something we want to handle.
4649 default:
4650 Scale = Width = TypeSize::getFixed(0);
4651 MinOffset = MaxOffset = 0;
4652 return false;
4653 // LDR / STR
4654 case AArch64::LDRQui:
4655 case AArch64::STRQui:
4656 Scale = Width = TypeSize::getFixed(16);
4657 MinOffset = 0;
4658 MaxOffset = 4095;
4659 break;
4660 case AArch64::LDRXui:
4661 case AArch64::LDRDui:
4662 case AArch64::STRXui:
4663 case AArch64::STRDui:
4664 case AArch64::PRFMui:
4665 Scale = Width = TypeSize::getFixed(8);
4666 MinOffset = 0;
4667 MaxOffset = 4095;
4668 break;
4669 case AArch64::LDRWui:
4670 case AArch64::LDRSui:
4671 case AArch64::LDRSWui:
4672 case AArch64::STRWui:
4673 case AArch64::STRSui:
4674 Scale = Width = TypeSize::getFixed(4);
4675 MinOffset = 0;
4676 MaxOffset = 4095;
4677 break;
4678 case AArch64::LDRHui:
4679 case AArch64::LDRHHui:
4680 case AArch64::LDRSHWui:
4681 case AArch64::LDRSHXui:
4682 case AArch64::STRHui:
4683 case AArch64::STRHHui:
4684 Scale = Width = TypeSize::getFixed(2);
4685 MinOffset = 0;
4686 MaxOffset = 4095;
4687 break;
4688 case AArch64::LDRBui:
4689 case AArch64::LDRBBui:
4690 case AArch64::LDRSBWui:
4691 case AArch64::LDRSBXui:
4692 case AArch64::STRBui:
4693 case AArch64::STRBBui:
4694 Scale = Width = TypeSize::getFixed(1);
4695 MinOffset = 0;
4696 MaxOffset = 4095;
4697 break;
4698 // post/pre inc
4699 case AArch64::STRQpre:
4700 case AArch64::LDRQpost:
4701 Scale = TypeSize::getFixed(1);
4702 Width = TypeSize::getFixed(16);
4703 MinOffset = -256;
4704 MaxOffset = 255;
4705 break;
4706 case AArch64::LDRDpost:
4707 case AArch64::LDRDpre:
4708 case AArch64::LDRXpost:
4709 case AArch64::LDRXpre:
4710 case AArch64::STRDpost:
4711 case AArch64::STRDpre:
4712 case AArch64::STRXpost:
4713 case AArch64::STRXpre:
4714 Scale = TypeSize::getFixed(1);
4715 Width = TypeSize::getFixed(8);
4716 MinOffset = -256;
4717 MaxOffset = 255;
4718 break;
4719 case AArch64::STRWpost:
4720 case AArch64::STRWpre:
4721 case AArch64::LDRWpost:
4722 case AArch64::LDRWpre:
4723 case AArch64::STRSpost:
4724 case AArch64::STRSpre:
4725 case AArch64::LDRSpost:
4726 case AArch64::LDRSpre:
4727 Scale = TypeSize::getFixed(1);
4728 Width = TypeSize::getFixed(4);
4729 MinOffset = -256;
4730 MaxOffset = 255;
4731 break;
4732 case AArch64::LDRHpost:
4733 case AArch64::LDRHpre:
4734 case AArch64::STRHpost:
4735 case AArch64::STRHpre:
4736 case AArch64::LDRHHpost:
4737 case AArch64::LDRHHpre:
4738 case AArch64::STRHHpost:
4739 case AArch64::STRHHpre:
4740 Scale = TypeSize::getFixed(1);
4741 Width = TypeSize::getFixed(2);
4742 MinOffset = -256;
4743 MaxOffset = 255;
4744 break;
4745 case AArch64::LDRBpost:
4746 case AArch64::LDRBpre:
4747 case AArch64::STRBpost:
4748 case AArch64::STRBpre:
4749 case AArch64::LDRBBpost:
4750 case AArch64::LDRBBpre:
4751 case AArch64::STRBBpost:
4752 case AArch64::STRBBpre:
4753 Scale = Width = TypeSize::getFixed(1);
4754 MinOffset = -256;
4755 MaxOffset = 255;
4756 break;
4757 // Unscaled
4758 case AArch64::LDURQi:
4759 case AArch64::STURQi:
4760 Scale = TypeSize::getFixed(1);
4761 Width = TypeSize::getFixed(16);
4762 MinOffset = -256;
4763 MaxOffset = 255;
4764 break;
4765 case AArch64::LDURXi:
4766 case AArch64::LDURDi:
4767 case AArch64::LDAPURXi:
4768 case AArch64::STURXi:
4769 case AArch64::STURDi:
4770 case AArch64::STLURXi:
4771 case AArch64::PRFUMi:
4772 Scale = TypeSize::getFixed(1);
4773 Width = TypeSize::getFixed(8);
4774 MinOffset = -256;
4775 MaxOffset = 255;
4776 break;
4777 case AArch64::LDURWi:
4778 case AArch64::LDURSi:
4779 case AArch64::LDURSWi:
4780 case AArch64::LDAPURi:
4781 case AArch64::LDAPURSWi:
4782 case AArch64::STURWi:
4783 case AArch64::STURSi:
4784 case AArch64::STLURWi:
4785 Scale = TypeSize::getFixed(1);
4786 Width = TypeSize::getFixed(4);
4787 MinOffset = -256;
4788 MaxOffset = 255;
4789 break;
4790 case AArch64::LDURHi:
4791 case AArch64::LDURHHi:
4792 case AArch64::LDURSHXi:
4793 case AArch64::LDURSHWi:
4794 case AArch64::LDAPURHi:
4795 case AArch64::LDAPURSHWi:
4796 case AArch64::LDAPURSHXi:
4797 case AArch64::STURHi:
4798 case AArch64::STURHHi:
4799 case AArch64::STLURHi:
4800 Scale = TypeSize::getFixed(1);
4801 Width = TypeSize::getFixed(2);
4802 MinOffset = -256;
4803 MaxOffset = 255;
4804 break;
4805 case AArch64::LDURBi:
4806 case AArch64::LDURBBi:
4807 case AArch64::LDURSBXi:
4808 case AArch64::LDURSBWi:
4809 case AArch64::LDAPURBi:
4810 case AArch64::LDAPURSBWi:
4811 case AArch64::LDAPURSBXi:
4812 case AArch64::STURBi:
4813 case AArch64::STURBBi:
4814 case AArch64::STLURBi:
4815 Scale = Width = TypeSize::getFixed(1);
4816 MinOffset = -256;
4817 MaxOffset = 255;
4818 break;
4819 // LDP / STP (including pre/post inc)
4820 case AArch64::LDPQi:
4821 case AArch64::LDNPQi:
4822 case AArch64::STPQi:
4823 case AArch64::STNPQi:
4824 case AArch64::LDPQpost:
4825 case AArch64::LDPQpre:
4826 case AArch64::STPQpost:
4827 case AArch64::STPQpre:
4828 Scale = TypeSize::getFixed(16);
4829 Width = TypeSize::getFixed(16 * 2);
4830 MinOffset = -64;
4831 MaxOffset = 63;
4832 break;
4833 case AArch64::LDPXi:
4834 case AArch64::LDPDi:
4835 case AArch64::LDNPXi:
4836 case AArch64::LDNPDi:
4837 case AArch64::STPXi:
4838 case AArch64::STPDi:
4839 case AArch64::STNPXi:
4840 case AArch64::STNPDi:
4841 case AArch64::LDPDpost:
4842 case AArch64::LDPDpre:
4843 case AArch64::LDPXpost:
4844 case AArch64::LDPXpre:
4845 case AArch64::STPDpost:
4846 case AArch64::STPDpre:
4847 case AArch64::STPXpost:
4848 case AArch64::STPXpre:
4849 Scale = TypeSize::getFixed(8);
4850 Width = TypeSize::getFixed(8 * 2);
4851 MinOffset = -64;
4852 MaxOffset = 63;
4853 break;
4854 case AArch64::LDPWi:
4855 case AArch64::LDPSi:
4856 case AArch64::LDNPWi:
4857 case AArch64::LDNPSi:
4858 case AArch64::STPWi:
4859 case AArch64::STPSi:
4860 case AArch64::STNPWi:
4861 case AArch64::STNPSi:
4862 case AArch64::LDPSpost:
4863 case AArch64::LDPSpre:
4864 case AArch64::LDPWpost:
4865 case AArch64::LDPWpre:
4866 case AArch64::STPSpost:
4867 case AArch64::STPSpre:
4868 case AArch64::STPWpost:
4869 case AArch64::STPWpre:
4870 Scale = TypeSize::getFixed(4);
4871 Width = TypeSize::getFixed(4 * 2);
4872 MinOffset = -64;
4873 MaxOffset = 63;
4874 break;
4875 case AArch64::StoreSwiftAsyncContext:
4876 // Store is an STRXui, but there might be an ADDXri in the expansion too.
4877 Scale = TypeSize::getFixed(1);
4878 Width = TypeSize::getFixed(8);
4879 MinOffset = 0;
4880 MaxOffset = 4095;
4881 break;
4882 case AArch64::ADDG:
4883 Scale = TypeSize::getFixed(16);
4884 Width = TypeSize::getFixed(0);
4885 MinOffset = 0;
4886 MaxOffset = 63;
4887 break;
4888 case AArch64::TAGPstack:
4889 Scale = TypeSize::getFixed(16);
4890 Width = TypeSize::getFixed(0);
4891 // TAGP with a negative offset turns into SUBP, which has a maximum offset
4892 // of 63 (not 64!).
4893 MinOffset = -63;
4894 MaxOffset = 63;
4895 break;
4896 case AArch64::LDG:
4897 case AArch64::STGi:
4898 case AArch64::STGPreIndex:
4899 case AArch64::STGPostIndex:
4900 case AArch64::STZGi:
4901 case AArch64::STZGPreIndex:
4902 case AArch64::STZGPostIndex:
4903 Scale = Width = TypeSize::getFixed(16);
4904 MinOffset = -256;
4905 MaxOffset = 255;
4906 break;
4907 // SVE
4908 case AArch64::STR_ZZZZXI:
4909 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
4910 case AArch64::LDR_ZZZZXI:
4911 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
4912 Scale = TypeSize::getScalable(16);
4913 Width = TypeSize::getScalable(16 * 4);
4914 MinOffset = -256;
4915 MaxOffset = 252;
4916 break;
4917 case AArch64::STR_ZZZXI:
4918 case AArch64::LDR_ZZZXI:
4919 Scale = TypeSize::getScalable(16);
4920 Width = TypeSize::getScalable(16 * 3);
4921 MinOffset = -256;
4922 MaxOffset = 253;
4923 break;
4924 case AArch64::STR_ZZXI:
4925 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
4926 case AArch64::LDR_ZZXI:
4927 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
4928 Scale = TypeSize::getScalable(16);
4929 Width = TypeSize::getScalable(16 * 2);
4930 MinOffset = -256;
4931 MaxOffset = 254;
4932 break;
4933 case AArch64::LDR_PXI:
4934 case AArch64::STR_PXI:
4935 Scale = Width = TypeSize::getScalable(2);
4936 MinOffset = -256;
4937 MaxOffset = 255;
4938 break;
4939 case AArch64::LDR_PPXI:
4940 case AArch64::STR_PPXI:
4941 Scale = TypeSize::getScalable(2);
4942 Width = TypeSize::getScalable(2 * 2);
4943 MinOffset = -256;
4944 MaxOffset = 254;
4945 break;
4946 case AArch64::LDR_ZXI:
4947 case AArch64::STR_ZXI:
4948 Scale = Width = TypeSize::getScalable(16);
4949 MinOffset = -256;
4950 MaxOffset = 255;
4951 break;
4952 case AArch64::LD1B_IMM:
4953 case AArch64::LD1H_IMM:
4954 case AArch64::LD1W_IMM:
4955 case AArch64::LD1D_IMM:
4956 case AArch64::LDNT1B_ZRI:
4957 case AArch64::LDNT1H_ZRI:
4958 case AArch64::LDNT1W_ZRI:
4959 case AArch64::LDNT1D_ZRI:
4960 case AArch64::ST1B_IMM:
4961 case AArch64::ST1H_IMM:
4962 case AArch64::ST1W_IMM:
4963 case AArch64::ST1D_IMM:
4964 case AArch64::STNT1B_ZRI:
4965 case AArch64::STNT1H_ZRI:
4966 case AArch64::STNT1W_ZRI:
4967 case AArch64::STNT1D_ZRI:
4968 case AArch64::LDNF1B_IMM:
4969 case AArch64::LDNF1H_IMM:
4970 case AArch64::LDNF1W_IMM:
4971 case AArch64::LDNF1D_IMM:
4972 // A full vectors worth of data
4973 // Width = mbytes * elements
4974 Scale = Width = TypeSize::getScalable(16);
4975 MinOffset = -8;
4976 MaxOffset = 7;
4977 break;
4978 case AArch64::LD2B_IMM:
4979 case AArch64::LD2H_IMM:
4980 case AArch64::LD2W_IMM:
4981 case AArch64::LD2D_IMM:
4982 case AArch64::ST2B_IMM:
4983 case AArch64::ST2H_IMM:
4984 case AArch64::ST2W_IMM:
4985 case AArch64::ST2D_IMM:
4986 case AArch64::LD1B_2Z_IMM:
4987 case AArch64::LD1B_2Z_STRIDED_IMM:
4988 case AArch64::LD1H_2Z_IMM:
4989 case AArch64::LD1H_2Z_STRIDED_IMM:
4990 case AArch64::LD1W_2Z_IMM:
4991 case AArch64::LD1W_2Z_STRIDED_IMM:
4992 case AArch64::LD1D_2Z_IMM:
4993 case AArch64::LD1D_2Z_STRIDED_IMM:
4994 case AArch64::LD1B_2Z_IMM_PSEUDO:
4995 case AArch64::LD1H_2Z_IMM_PSEUDO:
4996 case AArch64::LD1W_2Z_IMM_PSEUDO:
4997 case AArch64::LD1D_2Z_IMM_PSEUDO:
4998 case AArch64::ST1B_2Z_IMM:
4999 case AArch64::ST1B_2Z_STRIDED_IMM:
5000 case AArch64::ST1H_2Z_IMM:
5001 case AArch64::ST1H_2Z_STRIDED_IMM:
5002 case AArch64::ST1W_2Z_IMM:
5003 case AArch64::ST1W_2Z_STRIDED_IMM:
5004 case AArch64::ST1D_2Z_IMM:
5005 case AArch64::ST1D_2Z_STRIDED_IMM:
5006 case AArch64::LDNT1B_2Z_IMM_PSEUDO:
5007 case AArch64::LDNT1B_2Z_IMM:
5008 case AArch64::LDNT1B_2Z_STRIDED_IMM:
5009 case AArch64::LDNT1H_2Z_IMM_PSEUDO:
5010 case AArch64::LDNT1H_2Z_IMM:
5011 case AArch64::LDNT1H_2Z_STRIDED_IMM:
5012 case AArch64::LDNT1W_2Z_IMM_PSEUDO:
5013 case AArch64::LDNT1W_2Z_IMM:
5014 case AArch64::LDNT1W_2Z_STRIDED_IMM:
5015 case AArch64::LDNT1D_2Z_IMM_PSEUDO:
5016 case AArch64::LDNT1D_2Z_IMM:
5017 case AArch64::LDNT1D_2Z_STRIDED_IMM:
5018 case AArch64::STNT1B_2Z_IMM:
5019 case AArch64::STNT1B_2Z_STRIDED_IMM:
5020 case AArch64::STNT1H_2Z_IMM:
5021 case AArch64::STNT1H_2Z_STRIDED_IMM:
5022 case AArch64::STNT1W_2Z_IMM:
5023 case AArch64::STNT1W_2Z_STRIDED_IMM:
5024 case AArch64::STNT1D_2Z_IMM:
5025 case AArch64::STNT1D_2Z_STRIDED_IMM:
5026 Scale = Width = TypeSize::getScalable(16 * 2);
5027 MinOffset = -8;
5028 MaxOffset = 7;
5029 break;
5030 case AArch64::LD3B_IMM:
5031 case AArch64::LD3H_IMM:
5032 case AArch64::LD3W_IMM:
5033 case AArch64::LD3D_IMM:
5034 case AArch64::ST3B_IMM:
5035 case AArch64::ST3H_IMM:
5036 case AArch64::ST3W_IMM:
5037 case AArch64::ST3D_IMM:
5038 Scale = Width = TypeSize::getScalable(16 * 3);
5039 MinOffset = -8;
5040 MaxOffset = 7;
5041 break;
5042 case AArch64::LD4B_IMM:
5043 case AArch64::LD4H_IMM:
5044 case AArch64::LD4W_IMM:
5045 case AArch64::LD4D_IMM:
5046 case AArch64::ST4B_IMM:
5047 case AArch64::ST4H_IMM:
5048 case AArch64::ST4W_IMM:
5049 case AArch64::ST4D_IMM:
5050 case AArch64::LD1B_4Z_IMM:
5051 case AArch64::LD1B_4Z_STRIDED_IMM:
5052 case AArch64::LD1H_4Z_IMM:
5053 case AArch64::LD1H_4Z_STRIDED_IMM:
5054 case AArch64::LD1W_4Z_IMM:
5055 case AArch64::LD1W_4Z_STRIDED_IMM:
5056 case AArch64::LD1D_4Z_IMM:
5057 case AArch64::LD1D_4Z_STRIDED_IMM:
5058 case AArch64::LD1B_4Z_IMM_PSEUDO:
5059 case AArch64::LD1H_4Z_IMM_PSEUDO:
5060 case AArch64::LD1W_4Z_IMM_PSEUDO:
5061 case AArch64::LD1D_4Z_IMM_PSEUDO:
5062 case AArch64::ST1B_4Z_IMM:
5063 case AArch64::ST1B_4Z_STRIDED_IMM:
5064 case AArch64::ST1H_4Z_IMM:
5065 case AArch64::ST1H_4Z_STRIDED_IMM:
5066 case AArch64::ST1W_4Z_IMM:
5067 case AArch64::ST1W_4Z_STRIDED_IMM:
5068 case AArch64::ST1D_4Z_IMM:
5069 case AArch64::ST1D_4Z_STRIDED_IMM:
5070 case AArch64::LDNT1B_4Z_IMM_PSEUDO:
5071 case AArch64::LDNT1B_4Z_IMM:
5072 case AArch64::LDNT1B_4Z_STRIDED_IMM:
5073 case AArch64::LDNT1H_4Z_IMM_PSEUDO:
5074 case AArch64::LDNT1H_4Z_IMM:
5075 case AArch64::LDNT1H_4Z_STRIDED_IMM:
5076 case AArch64::LDNT1W_4Z_IMM_PSEUDO:
5077 case AArch64::LDNT1W_4Z_IMM:
5078 case AArch64::LDNT1W_4Z_STRIDED_IMM:
5079 case AArch64::LDNT1D_4Z_IMM_PSEUDO:
5080 case AArch64::LDNT1D_4Z_IMM:
5081 case AArch64::LDNT1D_4Z_STRIDED_IMM:
5082 case AArch64::STNT1B_4Z_IMM:
5083 case AArch64::STNT1B_4Z_STRIDED_IMM:
5084 case AArch64::STNT1H_4Z_IMM:
5085 case AArch64::STNT1H_4Z_STRIDED_IMM:
5086 case AArch64::STNT1W_4Z_IMM:
5087 case AArch64::STNT1W_4Z_STRIDED_IMM:
5088 case AArch64::STNT1D_4Z_IMM:
5089 case AArch64::STNT1D_4Z_STRIDED_IMM:
5090 Scale = Width = TypeSize::getScalable(16 * 4);
5091 MinOffset = -8;
5092 MaxOffset = 7;
5093 break;
5094 case AArch64::LD1B_H_IMM:
5095 case AArch64::LD1SB_H_IMM:
5096 case AArch64::LD1H_S_IMM:
5097 case AArch64::LD1SH_S_IMM:
5098 case AArch64::LD1W_D_IMM:
5099 case AArch64::LD1SW_D_IMM:
5100 case AArch64::ST1B_H_IMM:
5101 case AArch64::ST1H_S_IMM:
5102 case AArch64::ST1W_D_IMM:
5103 case AArch64::LDNF1B_H_IMM:
5104 case AArch64::LDNF1SB_H_IMM:
5105 case AArch64::LDNF1H_S_IMM:
5106 case AArch64::LDNF1SH_S_IMM:
5107 case AArch64::LDNF1W_D_IMM:
5108 case AArch64::LDNF1SW_D_IMM:
5109 // A half vector worth of data
5110 // Width = mbytes * elements
5111 Scale = Width = TypeSize::getScalable(8);
5112 MinOffset = -8;
5113 MaxOffset = 7;
5114 break;
5115 case AArch64::LD1B_S_IMM:
5116 case AArch64::LD1SB_S_IMM:
5117 case AArch64::LD1H_D_IMM:
5118 case AArch64::LD1SH_D_IMM:
5119 case AArch64::ST1B_S_IMM:
5120 case AArch64::ST1H_D_IMM:
5121 case AArch64::LDNF1B_S_IMM:
5122 case AArch64::LDNF1SB_S_IMM:
5123 case AArch64::LDNF1H_D_IMM:
5124 case AArch64::LDNF1SH_D_IMM:
5125 // A quarter vector worth of data
5126 // Width = mbytes * elements
5127 Scale = Width = TypeSize::getScalable(4);
5128 MinOffset = -8;
5129 MaxOffset = 7;
5130 break;
5131 case AArch64::LD1B_D_IMM:
5132 case AArch64::LD1SB_D_IMM:
5133 case AArch64::ST1B_D_IMM:
5134 case AArch64::LDNF1B_D_IMM:
5135 case AArch64::LDNF1SB_D_IMM:
5136 // A eighth vector worth of data
5137 // Width = mbytes * elements
5138 Scale = Width = TypeSize::getScalable(2);
5139 MinOffset = -8;
5140 MaxOffset = 7;
5141 break;
5142 case AArch64::ST2Gi:
5143 case AArch64::ST2GPreIndex:
5144 case AArch64::ST2GPostIndex:
5145 case AArch64::STZ2Gi:
5146 case AArch64::STZ2GPreIndex:
5147 case AArch64::STZ2GPostIndex:
5148 Scale = TypeSize::getFixed(16);
5149 Width = TypeSize::getFixed(32);
5150 MinOffset = -256;
5151 MaxOffset = 255;
5152 break;
5153 case AArch64::STGPi:
5154 case AArch64::STGPpost:
5155 case AArch64::STGPpre:
5156 Scale = Width = TypeSize::getFixed(16);
5157 MinOffset = -64;
5158 MaxOffset = 63;
5159 break;
5160 case AArch64::LD1RB_IMM:
5161 case AArch64::LD1RB_H_IMM:
5162 case AArch64::LD1RB_S_IMM:
5163 case AArch64::LD1RB_D_IMM:
5164 case AArch64::LD1RSB_H_IMM:
5165 case AArch64::LD1RSB_S_IMM:
5166 case AArch64::LD1RSB_D_IMM:
5167 Scale = Width = TypeSize::getFixed(1);
5168 MinOffset = 0;
5169 MaxOffset = 63;
5170 break;
5171 case AArch64::LD1RH_IMM:
5172 case AArch64::LD1RH_S_IMM:
5173 case AArch64::LD1RH_D_IMM:
5174 case AArch64::LD1RSH_S_IMM:
5175 case AArch64::LD1RSH_D_IMM:
5176 Scale = Width = TypeSize::getFixed(2);
5177 MinOffset = 0;
5178 MaxOffset = 63;
5179 break;
5180 case AArch64::LD1RW_IMM:
5181 case AArch64::LD1RW_D_IMM:
5182 case AArch64::LD1RSW_IMM:
5183 Scale = Width = TypeSize::getFixed(4);
5184 MinOffset = 0;
5185 MaxOffset = 63;
5186 break;
5187 case AArch64::LD1RD_IMM:
5188 Scale = Width = TypeSize::getFixed(8);
5189 MinOffset = 0;
5190 MaxOffset = 63;
5191 break;
5192 }
5193
5194 return true;
5195}
5196
5197// Scaling factor for unscaled load or store.
5199 switch (Opc) {
5200 default:
5201 llvm_unreachable("Opcode has unknown scale!");
5202 case AArch64::LDRBui:
5203 case AArch64::LDRBBui:
5204 case AArch64::LDURBBi:
5205 case AArch64::LDRSBWui:
5206 case AArch64::LDURSBWi:
5207 case AArch64::STRBui:
5208 case AArch64::STRBBui:
5209 case AArch64::STURBBi:
5210 return 1;
5211 case AArch64::LDRHui:
5212 case AArch64::LDRHHui:
5213 case AArch64::LDURHHi:
5214 case AArch64::LDRSHWui:
5215 case AArch64::LDURSHWi:
5216 case AArch64::STRHui:
5217 case AArch64::STRHHui:
5218 case AArch64::STURHHi:
5219 return 2;
5220 case AArch64::LDRSui:
5221 case AArch64::LDURSi:
5222 case AArch64::LDRSpre:
5223 case AArch64::LDRSWui:
5224 case AArch64::LDURSWi:
5225 case AArch64::LDRSWpre:
5226 case AArch64::LDRWpre:
5227 case AArch64::LDRWui:
5228 case AArch64::LDURWi:
5229 case AArch64::STRSui:
5230 case AArch64::STURSi:
5231 case AArch64::STRSpre:
5232 case AArch64::STRWui:
5233 case AArch64::STURWi:
5234 case AArch64::STRWpre:
5235 case AArch64::LDPSi:
5236 case AArch64::LDPSWi:
5237 case AArch64::LDPWi:
5238 case AArch64::STPSi:
5239 case AArch64::STPWi:
5240 return 4;
5241 case AArch64::LDRDui:
5242 case AArch64::LDURDi:
5243 case AArch64::LDRDpre:
5244 case AArch64::LDRXui:
5245 case AArch64::LDURXi:
5246 case AArch64::LDRXpre:
5247 case AArch64::STRDui:
5248 case AArch64::STURDi:
5249 case AArch64::STRDpre:
5250 case AArch64::STRXui:
5251 case AArch64::STURXi:
5252 case AArch64::STRXpre:
5253 case AArch64::LDPDi:
5254 case AArch64::LDPXi:
5255 case AArch64::STPDi:
5256 case AArch64::STPXi:
5257 return 8;
5258 case AArch64::LDRQui:
5259 case AArch64::LDURQi:
5260 case AArch64::STRQui:
5261 case AArch64::STURQi:
5262 case AArch64::STRQpre:
5263 case AArch64::LDPQi:
5264 case AArch64::LDRQpre:
5265 case AArch64::STPQi:
5266 case AArch64::STGi:
5267 case AArch64::STZGi:
5268 case AArch64::ST2Gi:
5269 case AArch64::STZ2Gi:
5270 case AArch64::STGPi:
5271 return 16;
5272 }
5273}
5274
5276 switch (MI.getOpcode()) {
5277 default:
5278 return false;
5279 case AArch64::LDRWpre:
5280 case AArch64::LDRXpre:
5281 case AArch64::LDRSWpre:
5282 case AArch64::LDRSpre:
5283 case AArch64::LDRDpre:
5284 case AArch64::LDRQpre:
5285 return true;
5286 }
5287}
5288
5290 switch (MI.getOpcode()) {
5291 default:
5292 return false;
5293 case AArch64::STRWpre:
5294 case AArch64::STRXpre:
5295 case AArch64::STRSpre:
5296 case AArch64::STRDpre:
5297 case AArch64::STRQpre:
5298 return true;
5299 }
5300}
5301
5303 return isPreLd(MI) || isPreSt(MI);
5304}
5305
5307 switch (MI.getOpcode()) {
5308 default:
5309 return false;
5310 case AArch64::LDURBBi:
5311 case AArch64::LDURHHi:
5312 case AArch64::LDURWi:
5313 case AArch64::LDRBBui:
5314 case AArch64::LDRHHui:
5315 case AArch64::LDRWui:
5316 case AArch64::LDRBBroX:
5317 case AArch64::LDRHHroX:
5318 case AArch64::LDRWroX:
5319 case AArch64::LDRBBroW:
5320 case AArch64::LDRHHroW:
5321 case AArch64::LDRWroW:
5322 return true;
5323 }
5324}
5325
5327 switch (MI.getOpcode()) {
5328 default:
5329 return false;
5330 case AArch64::LDURSBWi:
5331 case AArch64::LDURSHWi:
5332 case AArch64::LDURSBXi:
5333 case AArch64::LDURSHXi:
5334 case AArch64::LDURSWi:
5335 case AArch64::LDRSBWui:
5336 case AArch64::LDRSHWui:
5337 case AArch64::LDRSBXui:
5338 case AArch64::LDRSHXui:
5339 case AArch64::LDRSWui:
5340 case AArch64::LDRSBWroX:
5341 case AArch64::LDRSHWroX:
5342 case AArch64::LDRSBXroX:
5343 case AArch64::LDRSHXroX:
5344 case AArch64::LDRSWroX:
5345 case AArch64::LDRSBWroW:
5346 case AArch64::LDRSHWroW:
5347 case AArch64::LDRSBXroW:
5348 case AArch64::LDRSHXroW:
5349 case AArch64::LDRSWroW:
5350 return true;
5351 }
5352}
5353
5355 switch (MI.getOpcode()) {
5356 default:
5357 return false;
5358 case AArch64::LDPSi:
5359 case AArch64::LDPSWi:
5360 case AArch64::LDPDi:
5361 case AArch64::LDPQi:
5362 case AArch64::LDPWi:
5363 case AArch64::LDPXi:
5364 case AArch64::STPSi:
5365 case AArch64::STPDi:
5366 case AArch64::STPQi:
5367 case AArch64::STPWi:
5368 case AArch64::STPXi:
5369 case AArch64::STGPi:
5370 return true;
5371 }
5372}
5373
5375 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
5376 unsigned Idx =
5378 : 1;
5379 return MI.getOperand(Idx);
5380}
5381
5382const MachineOperand &
5384 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
5385 unsigned Idx =
5387 : 2;
5388 return MI.getOperand(Idx);
5389}
5390
5391const MachineOperand &
5393 switch (MI.getOpcode()) {
5394 default:
5395 llvm_unreachable("Unexpected opcode");
5396 case AArch64::LDRBroX:
5397 case AArch64::LDRBBroX:
5398 case AArch64::LDRSBXroX:
5399 case AArch64::LDRSBWroX:
5400 case AArch64::LDRHroX:
5401 case AArch64::LDRHHroX:
5402 case AArch64::LDRSHXroX:
5403 case AArch64::LDRSHWroX:
5404 case AArch64::LDRWroX:
5405 case AArch64::LDRSroX:
5406 case AArch64::LDRSWroX:
5407 case AArch64::LDRDroX:
5408 case AArch64::LDRXroX:
5409 case AArch64::LDRQroX:
5410 return MI.getOperand(4);
5411 }
5412}
5413
5415 Register Reg) {
5416 if (MI.getParent() == nullptr)
5417 return nullptr;
5418 const MachineFunction *MF = MI.getParent()->getParent();
5419 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
5420}
5421
5423 auto IsHFPR = [&](const MachineOperand &Op) {
5424 if (!Op.isReg())
5425 return false;
5426 auto Reg = Op.getReg();
5427 if (Reg.isPhysical())
5428 return AArch64::FPR16RegClass.contains(Reg);
5429 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
5430 return TRC == &AArch64::FPR16RegClass ||
5431 TRC == &AArch64::FPR16_loRegClass;
5432 };
5433 return llvm::any_of(MI.operands(), IsHFPR);
5434}
5435
5437 auto IsQFPR = [&](const MachineOperand &Op) {
5438 if (!Op.isReg())
5439 return false;
5440 auto Reg = Op.getReg();
5441 if (Reg.isPhysical())
5442 return AArch64::FPR128RegClass.contains(Reg);
5443 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
5444 return TRC == &AArch64::FPR128RegClass ||
5445 TRC == &AArch64::FPR128_loRegClass;
5446 };
5447 return llvm::any_of(MI.operands(), IsQFPR);
5448}
5449
5451 switch (MI.getOpcode()) {
5452 case AArch64::BRK:
5453 case AArch64::HLT:
5454 case AArch64::PACIASP:
5455 case AArch64::PACIBSP:
5456 // Implicit BTI behavior.
5457 return true;
5458 case AArch64::PAUTH_PROLOGUE:
5459 // PAUTH_PROLOGUE expands to PACI(A|B)SP.
5460 return true;
5461 case AArch64::HINT: {
5462 unsigned Imm = MI.getOperand(0).getImm();
5463 // Explicit BTI instruction.
5464 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
5465 return true;
5466 // PACI(A|B)SP instructions.
5467 if (Imm == 25 || Imm == 27)
5468 return true;
5469 return false;
5470 }
5471 default:
5472 return false;
5473 }
5474}
5475
5477 if (Reg == 0)
5478 return false;
5479 assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");
5480 return AArch64::FPR128RegClass.contains(Reg) ||
5481 AArch64::FPR64RegClass.contains(Reg) ||
5482 AArch64::FPR32RegClass.contains(Reg) ||
5483 AArch64::FPR16RegClass.contains(Reg) ||
5484 AArch64::FPR8RegClass.contains(Reg);
5485}
5486
5488 auto IsFPR = [&](const MachineOperand &Op) {
5489 if (!Op.isReg())
5490 return false;
5491 auto Reg = Op.getReg();
5492 if (Reg.isPhysical())
5493 return isFpOrNEON(Reg);
5494
5495 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
5496 return TRC == &AArch64::FPR128RegClass ||
5497 TRC == &AArch64::FPR128_loRegClass ||
5498 TRC == &AArch64::FPR64RegClass ||
5499 TRC == &AArch64::FPR64_loRegClass ||
5500 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
5501 TRC == &AArch64::FPR8RegClass;
5502 };
5503 return llvm::any_of(MI.operands(), IsFPR);
5504}
5505
5506// Scale the unscaled offsets. Returns false if the unscaled offset can't be
5507// scaled.
5508static bool scaleOffset(unsigned Opc, int64_t &Offset) {
5510
5511 // If the byte-offset isn't a multiple of the stride, we can't scale this
5512 // offset.
5513 if (Offset % Scale != 0)
5514 return false;
5515
5516 // Convert the byte-offset used by unscaled into an "element" offset used
5517 // by the scaled pair load/store instructions.
5518 Offset /= Scale;
5519 return true;
5520}
5521
5522static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
5523 if (FirstOpc == SecondOpc)
5524 return true;
5525 // We can also pair sign-ext and zero-ext instructions.
5526 switch (FirstOpc) {
5527 default:
5528 return false;
5529 case AArch64::STRSui:
5530 case AArch64::STURSi:
5531 return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
5532 case AArch64::STRDui:
5533 case AArch64::STURDi:
5534 return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
5535 case AArch64::STRQui:
5536 case AArch64::STURQi:
5537 return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
5538 case AArch64::STRWui:
5539 case AArch64::STURWi:
5540 return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
5541 case AArch64::STRXui:
5542 case AArch64::STURXi:
5543 return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
5544 case AArch64::LDRSui:
5545 case AArch64::LDURSi:
5546 return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
5547 case AArch64::LDRDui:
5548 case AArch64::LDURDi:
5549 return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
5550 case AArch64::LDRQui:
5551 case AArch64::LDURQi:
5552 return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
5553 case AArch64::LDRWui:
5554 case AArch64::LDURWi:
5555 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
5556 case AArch64::LDRSWui:
5557 case AArch64::LDURSWi:
5558 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
5559 case AArch64::LDRXui:
5560 case AArch64::LDURXi:
5561 return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
5562 }
5563 // These instructions can't be paired based on their opcodes.
5564 return false;
5565}
5566
5567static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
5568 int64_t Offset1, unsigned Opcode1, int FI2,
5569 int64_t Offset2, unsigned Opcode2) {
5570 // Accesses through fixed stack object frame indices may access a different
5571 // fixed stack slot. Check that the object offsets + offsets match.
5572 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
5573 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
5574 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
5575 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
5576 // Convert to scaled object offsets.
5577 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
5578 if (ObjectOffset1 % Scale1 != 0)
5579 return false;
5580 ObjectOffset1 /= Scale1;
5581 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
5582 if (ObjectOffset2 % Scale2 != 0)
5583 return false;
5584 ObjectOffset2 /= Scale2;
5585 ObjectOffset1 += Offset1;
5586 ObjectOffset2 += Offset2;
5587 return ObjectOffset1 + 1 == ObjectOffset2;
5588 }
5589
5590 return FI1 == FI2;
5591}
5592
5593/// Detect opportunities for ldp/stp formation.
5594///
5595/// Only called for LdSt for which getMemOperandWithOffset returns true.
5597 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
5598 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
5599 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
5600 unsigned NumBytes) const {
5601 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
5602 const MachineOperand &BaseOp1 = *BaseOps1.front();
5603 const MachineOperand &BaseOp2 = *BaseOps2.front();
5604 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
5605 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
5606 if (BaseOp1.getType() != BaseOp2.getType())
5607 return false;
5608
5609 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
5610 "Only base registers and frame indices are supported.");
5611
5612 // Check for both base regs and base FI.
5613 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
5614 return false;
5615
5616 // Only cluster up to a single pair.
5617 if (ClusterSize > 2)
5618 return false;
5619
5620 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
5621 return false;
5622
5623 // Can we pair these instructions based on their opcodes?
5624 unsigned FirstOpc = FirstLdSt.getOpcode();
5625 unsigned SecondOpc = SecondLdSt.getOpcode();
5626 if (!canPairLdStOpc(FirstOpc, SecondOpc))
5627 return false;
5628
5629 // Can't merge volatiles or load/stores that have a hint to avoid pair
5630 // formation, for example.
5631 if (!isCandidateToMergeOrPair(FirstLdSt) ||
5632 !isCandidateToMergeOrPair(SecondLdSt))
5633 return false;
5634
5635 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
5636 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
5637 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
5638 return false;
5639
5640 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
5641 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
5642 return false;
5643
5644 // Pairwise instructions have a 7-bit signed offset field.
5645 if (Offset1 > 63 || Offset1 < -64)
5646 return false;
5647
5648 // The caller should already have ordered First/SecondLdSt by offset.
5649 // Note: except for non-equal frame index bases
5650 if (BaseOp1.isFI()) {
5651 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
5652 "Caller should have ordered offsets.");
5653
5654 const MachineFrameInfo &MFI =
5655 FirstLdSt.getParent()->getParent()->getFrameInfo();
5656 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
5657 BaseOp2.getIndex(), Offset2, SecondOpc);
5658 }
5659
5660 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
5661
5662 return Offset1 + 1 == Offset2;
5663}
5664
5666 MCRegister Reg, unsigned SubIdx,
5667 RegState State,
5668 const TargetRegisterInfo *TRI) {
5669 if (!SubIdx)
5670 return MIB.addReg(Reg, State);
5671
5672 if (Reg.isPhysical())
5673 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
5674 return MIB.addReg(Reg, State, SubIdx);
5675}
5676
5677static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
5678 unsigned NumRegs) {
5679 // We really want the positive remainder mod 32 here, that happens to be
5680 // easily obtainable with a mask.
5681 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
5682}
5683
5686 const DebugLoc &DL, MCRegister DestReg,
5687 MCRegister SrcReg, bool KillSrc,
5688 unsigned Opcode,
5689 ArrayRef<unsigned> Indices) const {
5690 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
5692 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5693 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5694 unsigned NumRegs = Indices.size();
5695
5696 int SubReg = 0, End = NumRegs, Incr = 1;
5697 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
5698 SubReg = NumRegs - 1;
5699 End = -1;
5700 Incr = -1;
5701 }
5702
5703 for (; SubReg != End; SubReg += Incr) {
5704 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
5705 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
5706 AddSubReg(MIB, SrcReg, Indices[SubReg], {}, TRI);
5707 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
5708 }
5709}
5710
5713 const DebugLoc &DL, MCRegister DestReg,
5714 MCRegister SrcReg, bool KillSrc,
5715 unsigned Opcode, unsigned ZeroReg,
5716 llvm::ArrayRef<unsigned> Indices) const {
5718 unsigned NumRegs = Indices.size();
5719
5720#ifndef NDEBUG
5721 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5722 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5723 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
5724 "GPR reg sequences should not be able to overlap");
5725#endif
5726
5727 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
5728 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
5729 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
5730 MIB.addReg(ZeroReg);
5731 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
5732 MIB.addImm(0);
5733 }
5734}
5735
5736/// Returns true if the instruction at I is in a streaming call site region,
5737/// within a single basic block.
5738/// A "call site streaming region" starts after smstart and ends at smstop
5739/// around a call to a streaming function. This walks backward from I.
5742 MachineFunction &MF = *MBB.getParent();
5744 if (!AFI->hasStreamingModeChanges())
5745 return false;
5746 // Walk backwards to find smstart/smstop
5747 for (MachineInstr &MI : reverse(make_range(MBB.begin(), I))) {
5748 unsigned Opc = MI.getOpcode();
5749 if (Opc == AArch64::MSRpstatesvcrImm1 || Opc == AArch64::MSRpstatePseudo) {
5750 // Check if this is SM change (not ZA)
5751 int64_t PState = MI.getOperand(0).getImm();
5752 if (PState == AArch64SVCR::SVCRSM || PState == AArch64SVCR::SVCRSMZA) {
5753 // Operand 1 is 1 for start, 0 for stop
5754 return MI.getOperand(1).getImm() == 1;
5755 }
5756 }
5757 }
5758 return false;
5759}
5760
5761/// Returns true if in a streaming call site region without SME-FA64.
5762static bool mustAvoidNeonAtMBBI(const AArch64Subtarget &Subtarget,
5765 return !Subtarget.hasSMEFA64() && isInStreamingCallSiteRegion(MBB, I);
5766}
5767
5770 const DebugLoc &DL, Register DestReg,
5771 Register SrcReg, bool KillSrc,
5772 bool RenamableDest,
5773 bool RenamableSrc) const {
5774 ++NumCopyInstrs;
5775 if (AArch64::GPR32spRegClass.contains(DestReg) &&
5776 AArch64::GPR32spRegClass.contains(SrcReg)) {
5777 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
5778 // If either operand is WSP, expand to ADD #0.
5779 if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5780 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5781 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
5782 MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
5783 &AArch64::GPR64spRegClass);
5784 MCRegister SrcRegX = RI.getMatchingSuperReg(SrcReg, AArch64::sub_32,
5785 &AArch64::GPR64spRegClass);
5786 // This instruction is reading and writing X registers. This may upset
5787 // the register scavenger and machine verifier, so we need to indicate
5788 // that we are reading an undefined value from SrcRegX, but a proper
5789 // value from SrcReg.
5790 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
5791 .addReg(SrcRegX, RegState::Undef)
5792 .addImm(0)
5794 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5795 ++NumZCRegMoveInstrsGPR;
5796 } else {
5797 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
5798 .addReg(SrcReg, getKillRegState(KillSrc))
5799 .addImm(0)
5801 if (Subtarget.hasZeroCycleRegMoveGPR32())
5802 ++NumZCRegMoveInstrsGPR;
5803 }
5804 } else if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5805 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5806 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
5807 MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
5808 &AArch64::GPR64spRegClass);
5809 assert(DestRegX.isValid() && "Destination super-reg not valid");
5810 MCRegister SrcRegX = RI.getMatchingSuperReg(SrcReg, AArch64::sub_32,
5811 &AArch64::GPR64spRegClass);
5812 assert(SrcRegX.isValid() && "Source super-reg not valid");
5813 // This instruction is reading and writing X registers. This may upset
5814 // the register scavenger and machine verifier, so we need to indicate
5815 // that we are reading an undefined value from SrcRegX, but a proper
5816 // value from SrcReg.
5817 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
5818 .addReg(AArch64::XZR)
5819 .addReg(SrcRegX, RegState::Undef)
5820 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5821 ++NumZCRegMoveInstrsGPR;
5822 } else {
5823 // Otherwise, expand to ORR WZR.
5824 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
5825 .addReg(AArch64::WZR)
5826 .addReg(SrcReg, getKillRegState(KillSrc));
5827 if (Subtarget.hasZeroCycleRegMoveGPR32())
5828 ++NumZCRegMoveInstrsGPR;
5829 }
5830 return;
5831 }
5832
5833 // GPR32 zeroing
5834 if (AArch64::GPR32spRegClass.contains(DestReg) && SrcReg == AArch64::WZR) {
5835 if (Subtarget.hasZeroCycleZeroingGPR64() &&
5836 !Subtarget.hasZeroCycleZeroingGPR32()) {
5837 MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
5838 &AArch64::GPR64spRegClass);
5839 assert(DestRegX.isValid() && "Destination super-reg not valid");
5840 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestRegX)
5841 .addImm(0)
5843 ++NumZCZeroingInstrsGPR;
5844 } else if (Subtarget.hasZeroCycleZeroingGPR32()) {
5845 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
5846 .addImm(0)
5848 ++NumZCZeroingInstrsGPR;
5849 } else {
5850 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
5851 .addReg(AArch64::WZR)
5852 .addReg(AArch64::WZR);
5853 }
5854 return;
5855 }
5856
5857 if (AArch64::GPR64spRegClass.contains(DestReg) &&
5858 AArch64::GPR64spRegClass.contains(SrcReg)) {
5859 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
5860 // If either operand is SP, expand to ADD #0.
5861 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
5862 .addReg(SrcReg, getKillRegState(KillSrc))
5863 .addImm(0)
5865 if (Subtarget.hasZeroCycleRegMoveGPR64())
5866 ++NumZCRegMoveInstrsGPR;
5867 } else {
5868 // Otherwise, expand to ORR XZR.
5869 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5870 .addReg(AArch64::XZR)
5871 .addReg(SrcReg, getKillRegState(KillSrc));
5872 if (Subtarget.hasZeroCycleRegMoveGPR64())
5873 ++NumZCRegMoveInstrsGPR;
5874 }
5875 return;
5876 }
5877
5878 // GPR64 zeroing
5879 if (AArch64::GPR64spRegClass.contains(DestReg) && SrcReg == AArch64::XZR) {
5880 if (Subtarget.hasZeroCycleZeroingGPR64()) {
5881 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
5882 .addImm(0)
5884 ++NumZCZeroingInstrsGPR;
5885 } else {
5886 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5887 .addReg(AArch64::XZR)
5888 .addReg(AArch64::XZR);
5889 }
5890 return;
5891 }
5892
5893 // Copy a Predicate register by ORRing with itself.
5894 if (AArch64::PPRRegClass.contains(DestReg) &&
5895 AArch64::PPRRegClass.contains(SrcReg)) {
5896 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5897 "Unexpected SVE register.");
5898 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
5899 .addReg(SrcReg) // Pg
5900 .addReg(SrcReg)
5901 .addReg(SrcReg, getKillRegState(KillSrc));
5902 return;
5903 }
5904
5905 // Copy a predicate-as-counter register by ORRing with itself as if it
5906 // were a regular predicate (mask) register.
5907 bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
5908 bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
5909 if (DestIsPNR || SrcIsPNR) {
5910 auto ToPPR = [](MCRegister R) -> MCRegister {
5911 return (R - AArch64::PN0) + AArch64::P0;
5912 };
5913 MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg.asMCReg();
5914 MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg.asMCReg();
5915
5916 if (PPRSrcReg != PPRDestReg) {
5917 auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
5918 .addReg(PPRSrcReg) // Pg
5919 .addReg(PPRSrcReg)
5920 .addReg(PPRSrcReg, getKillRegState(KillSrc));
5921 if (DestIsPNR)
5922 NewMI.addDef(DestReg, RegState::Implicit);
5923 }
5924 return;
5925 }
5926
5927 // Copy a Z register by ORRing with itself.
5928 if (AArch64::ZPRRegClass.contains(DestReg) &&
5929 AArch64::ZPRRegClass.contains(SrcReg)) {
5930 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5931 "Unexpected SVE register.");
5932 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
5933 .addReg(SrcReg)
5934 .addReg(SrcReg, getKillRegState(KillSrc));
5935 return;
5936 }
5937
5938 // Copy a Z register pair by copying the individual sub-registers.
5939 if ((AArch64::ZPR2RegClass.contains(DestReg) ||
5940 AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
5941 (AArch64::ZPR2RegClass.contains(SrcReg) ||
5942 AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
5943 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5944 "Unexpected SVE register.");
5945 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
5946 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5947 Indices);
5948 return;
5949 }
5950
5951 // Copy a Z register triple by copying the individual sub-registers.
5952 if (AArch64::ZPR3RegClass.contains(DestReg) &&
5953 AArch64::ZPR3RegClass.contains(SrcReg)) {
5954 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5955 "Unexpected SVE register.");
5956 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5957 AArch64::zsub2};
5958 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5959 Indices);
5960 return;
5961 }
5962
5963 // Copy a Z register quad by copying the individual sub-registers.
5964 if ((AArch64::ZPR4RegClass.contains(DestReg) ||
5965 AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
5966 (AArch64::ZPR4RegClass.contains(SrcReg) ||
5967 AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
5968 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5969 "Unexpected SVE register.");
5970 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5971 AArch64::zsub2, AArch64::zsub3};
5972 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5973 Indices);
5974 return;
5975 }
5976
5977 // Copy a DDDD register quad by copying the individual sub-registers.
5978 if (AArch64::DDDDRegClass.contains(DestReg) &&
5979 AArch64::DDDDRegClass.contains(SrcReg)) {
5980 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5981 AArch64::dsub2, AArch64::dsub3};
5982 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5983 Indices);
5984 return;
5985 }
5986
5987 // Copy a DDD register triple by copying the individual sub-registers.
5988 if (AArch64::DDDRegClass.contains(DestReg) &&
5989 AArch64::DDDRegClass.contains(SrcReg)) {
5990 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5991 AArch64::dsub2};
5992 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5993 Indices);
5994 return;
5995 }
5996
5997 // Copy a DD register pair by copying the individual sub-registers.
5998 if (AArch64::DDRegClass.contains(DestReg) &&
5999 AArch64::DDRegClass.contains(SrcReg)) {
6000 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
6001 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
6002 Indices);
6003 return;
6004 }
6005
6006 // Copy a QQQQ register quad by copying the individual sub-registers.
6007 if (AArch64::QQQQRegClass.contains(DestReg) &&
6008 AArch64::QQQQRegClass.contains(SrcReg)) {
6009 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
6010 AArch64::qsub2, AArch64::qsub3};
6011 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
6012 Indices);
6013 return;
6014 }
6015
6016 // Copy a QQQ register triple by copying the individual sub-registers.
6017 if (AArch64::QQQRegClass.contains(DestReg) &&
6018 AArch64::QQQRegClass.contains(SrcReg)) {
6019 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
6020 AArch64::qsub2};
6021 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
6022 Indices);
6023 return;
6024 }
6025
6026 // Copy a QQ register pair by copying the individual sub-registers.
6027 if (AArch64::QQRegClass.contains(DestReg) &&
6028 AArch64::QQRegClass.contains(SrcReg)) {
6029 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
6030 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
6031 Indices);
6032 return;
6033 }
6034
6035 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
6036 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
6037 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
6038 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
6039 AArch64::XZR, Indices);
6040 return;
6041 }
6042
6043 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
6044 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
6045 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
6046 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
6047 AArch64::WZR, Indices);
6048 return;
6049 }
6050
6051 if (AArch64::FPR128RegClass.contains(DestReg) &&
6052 AArch64::FPR128RegClass.contains(SrcReg)) {
6053 // In streaming regions, NEON is illegal but streaming-SVE is available.
6054 // Use SVE for copies if we're in a streaming region and SME is available.
6055 // With +sme-fa64, NEON is legal in streaming mode so we can use it.
6056 if ((Subtarget.isSVEorStreamingSVEAvailable() &&
6057 !Subtarget.isNeonAvailable()) ||
6058 mustAvoidNeonAtMBBI(Subtarget, MBB, I)) {
6059 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
6060 .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
6061 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
6062 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
6063 } else if (Subtarget.isNeonAvailable()) {
6064 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
6065 .addReg(SrcReg)
6066 .addReg(SrcReg, getKillRegState(KillSrc));
6067 if (Subtarget.hasZeroCycleRegMoveFPR128())
6068 ++NumZCRegMoveInstrsFPR;
6069 } else {
6070 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
6071 .addReg(AArch64::SP, RegState::Define)
6072 .addReg(SrcReg, getKillRegState(KillSrc))
6073 .addReg(AArch64::SP)
6074 .addImm(-16);
6075 BuildMI(MBB, I, DL, get(AArch64::LDRQpost))
6076 .addReg(AArch64::SP, RegState::Define)
6077 .addReg(DestReg, RegState::Define)
6078 .addReg(AArch64::SP)
6079 .addImm(16);
6080 }
6081 return;
6082 }
6083
6084 if (AArch64::FPR64RegClass.contains(DestReg) &&
6085 AArch64::FPR64RegClass.contains(SrcReg)) {
6086 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
6087 !Subtarget.hasZeroCycleRegMoveFPR64() &&
6088 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable() &&
6089 !mustAvoidNeonAtMBBI(Subtarget, MBB, I)) {
6090 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
6091 &AArch64::FPR128RegClass);
6092 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
6093 &AArch64::FPR128RegClass);
6094 // This instruction is reading and writing Q registers. This may upset
6095 // the register scavenger and machine verifier, so we need to indicate
6096 // that we are reading an undefined value from SrcRegQ, but a proper
6097 // value from SrcReg.
6098 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
6099 .addReg(SrcRegQ, RegState::Undef)
6100 .addReg(SrcRegQ, RegState::Undef)
6101 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
6102 ++NumZCRegMoveInstrsFPR;
6103 } else {
6104 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
6105 .addReg(SrcReg, getKillRegState(KillSrc));
6106 if (Subtarget.hasZeroCycleRegMoveFPR64())
6107 ++NumZCRegMoveInstrsFPR;
6108 }
6109 return;
6110 }
6111
6112 if (AArch64::FPR32RegClass.contains(DestReg) &&
6113 AArch64::FPR32RegClass.contains(SrcReg)) {
6114 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
6115 !Subtarget.hasZeroCycleRegMoveFPR64() &&
6116 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable() &&
6117 !mustAvoidNeonAtMBBI(Subtarget, MBB, I)) {
6118 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
6119 &AArch64::FPR128RegClass);
6120 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
6121 &AArch64::FPR128RegClass);
6122 // This instruction is reading and writing Q registers. This may upset
6123 // the register scavenger and machine verifier, so we need to indicate
6124 // that we are reading an undefined value from SrcRegQ, but a proper
6125 // value from SrcReg.
6126 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
6127 .addReg(SrcRegQ, RegState::Undef)
6128 .addReg(SrcRegQ, RegState::Undef)
6129 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
6130 ++NumZCRegMoveInstrsFPR;
6131 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
6132 !Subtarget.hasZeroCycleRegMoveFPR32()) {
6133 MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
6134 &AArch64::FPR64RegClass);
6135 MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
6136 &AArch64::FPR64RegClass);
6137 // This instruction is reading and writing D registers. This may upset
6138 // the register scavenger and machine verifier, so we need to indicate
6139 // that we are reading an undefined value from SrcRegD, but a proper
6140 // value from SrcReg.
6141 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
6142 .addReg(SrcRegD, RegState::Undef)
6143 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
6144 ++NumZCRegMoveInstrsFPR;
6145 } else {
6146 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
6147 .addReg(SrcReg, getKillRegState(KillSrc));
6148 if (Subtarget.hasZeroCycleRegMoveFPR32())
6149 ++NumZCRegMoveInstrsFPR;
6150 }
6151 return;
6152 }
6153
6154 if (AArch64::FPR16RegClass.contains(DestReg) &&
6155 AArch64::FPR16RegClass.contains(SrcReg)) {
6156 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
6157 !Subtarget.hasZeroCycleRegMoveFPR64() &&
6158 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable() &&
6159 !mustAvoidNeonAtMBBI(Subtarget, MBB, I)) {
6160 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
6161 &AArch64::FPR128RegClass);
6162 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
6163 &AArch64::FPR128RegClass);
6164 // This instruction is reading and writing Q registers. This may upset
6165 // the register scavenger and machine verifier, so we need to indicate
6166 // that we are reading an undefined value from SrcRegQ, but a proper
6167 // value from SrcReg.
6168 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
6169 .addReg(SrcRegQ, RegState::Undef)
6170 .addReg(SrcRegQ, RegState::Undef)
6171 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
6172 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
6173 !Subtarget.hasZeroCycleRegMoveFPR32()) {
6174 MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
6175 &AArch64::FPR64RegClass);
6176 MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
6177 &AArch64::FPR64RegClass);
6178 // This instruction is reading and writing D registers. This may upset
6179 // the register scavenger and machine verifier, so we need to indicate
6180 // that we are reading an undefined value from SrcRegD, but a proper
6181 // value from SrcReg.
6182 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
6183 .addReg(SrcRegD, RegState::Undef)
6184 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
6185 } else {
6186 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
6187 &AArch64::FPR32RegClass);
6188 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
6189 &AArch64::FPR32RegClass);
6190 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
6191 .addReg(SrcReg, getKillRegState(KillSrc));
6192 }
6193 return;
6194 }
6195
6196 if (AArch64::FPR8RegClass.contains(DestReg) &&
6197 AArch64::FPR8RegClass.contains(SrcReg)) {
6198 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
6199 !Subtarget.hasZeroCycleRegMoveFPR64() &&
6200 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable() &&
6201 !mustAvoidNeonAtMBBI(Subtarget, MBB, I)) {
6202 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
6203 &AArch64::FPR128RegClass);
6204 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
6205 &AArch64::FPR128RegClass);
6206 // This instruction is reading and writing Q registers. This may upset
6207 // the register scavenger and machine verifier, so we need to indicate
6208 // that we are reading an undefined value from SrcRegQ, but a proper
6209 // value from SrcReg.
6210 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
6211 .addReg(SrcRegQ, RegState::Undef)
6212 .addReg(SrcRegQ, RegState::Undef)
6213 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
6214 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
6215 !Subtarget.hasZeroCycleRegMoveFPR32()) {
6216 MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
6217 &AArch64::FPR64RegClass);
6218 MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
6219 &AArch64::FPR64RegClass);
6220 // This instruction is reading and writing D registers. This may upset
6221 // the register scavenger and machine verifier, so we need to indicate
6222 // that we are reading an undefined value from SrcRegD, but a proper
6223 // value from SrcReg.
6224 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
6225 .addReg(SrcRegD, RegState::Undef)
6226 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
6227 } else {
6228 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
6229 &AArch64::FPR32RegClass);
6230 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
6231 &AArch64::FPR32RegClass);
6232 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
6233 .addReg(SrcReg, getKillRegState(KillSrc));
6234 }
6235 return;
6236 }
6237
6238 // Copies between GPR64 and FPR64.
6239 if (AArch64::FPR64RegClass.contains(DestReg) &&
6240 AArch64::GPR64RegClass.contains(SrcReg)) {
6241 if (AArch64::XZR == SrcReg) {
6242 BuildMI(MBB, I, DL, get(AArch64::FMOVD0), DestReg);
6243 } else {
6244 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
6245 .addReg(SrcReg, getKillRegState(KillSrc));
6246 }
6247 return;
6248 }
6249 if (AArch64::GPR64RegClass.contains(DestReg) &&
6250 AArch64::FPR64RegClass.contains(SrcReg)) {
6251 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
6252 .addReg(SrcReg, getKillRegState(KillSrc));
6253 return;
6254 }
6255 // Copies between GPR32 and FPR32.
6256 if (AArch64::FPR32RegClass.contains(DestReg) &&
6257 AArch64::GPR32RegClass.contains(SrcReg)) {
6258 if (AArch64::WZR == SrcReg) {
6259 BuildMI(MBB, I, DL, get(AArch64::FMOVS0), DestReg);
6260 } else {
6261 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
6262 .addReg(SrcReg, getKillRegState(KillSrc));
6263 }
6264 return;
6265 }
6266 if (AArch64::GPR32RegClass.contains(DestReg) &&
6267 AArch64::FPR32RegClass.contains(SrcReg)) {
6268 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
6269 .addReg(SrcReg, getKillRegState(KillSrc));
6270 return;
6271 }
6272
6273 if (DestReg == AArch64::NZCV) {
6274 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
6275 BuildMI(MBB, I, DL, get(AArch64::MSR))
6276 .addImm(AArch64SysReg::NZCV)
6277 .addReg(SrcReg, getKillRegState(KillSrc))
6278 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
6279 return;
6280 }
6281
6282 if (SrcReg == AArch64::NZCV) {
6283 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
6284 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
6285 .addImm(AArch64SysReg::NZCV)
6286 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
6287 return;
6288 }
6289
6290#ifndef NDEBUG
6291 errs() << RI.getRegAsmName(DestReg) << " = COPY " << RI.getRegAsmName(SrcReg)
6292 << "\n";
6293#endif
6294 llvm_unreachable("unimplemented reg-to-reg copy");
6295}
6296
6299 MachineBasicBlock::iterator InsertBefore,
6300 const MCInstrDesc &MCID,
6301 Register SrcReg, bool IsKill,
6302 unsigned SubIdx0, unsigned SubIdx1, int FI,
6303 MachineMemOperand *MMO) {
6304 Register SrcReg0 = SrcReg;
6305 Register SrcReg1 = SrcReg;
6306 if (SrcReg.isPhysical()) {
6307 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
6308 SubIdx0 = 0;
6309 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
6310 SubIdx1 = 0;
6311 }
6312 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
6313 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
6314 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
6315 .addFrameIndex(FI)
6316 .addImm(0)
6317 .addMemOperand(MMO);
6318}
6319
6322 Register SrcReg, bool isKill, int FI,
6323 const TargetRegisterClass *RC,
6324 Register VReg,
6325 MachineInstr::MIFlag Flags) const {
6326 MachineFunction &MF = *MBB.getParent();
6327 MachineFrameInfo &MFI = MF.getFrameInfo();
6328
6330 MachineMemOperand *MMO =
6332 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
6333 unsigned Opc = 0;
6334 bool Offset = true;
6336 unsigned StackID = TargetStackID::Default;
6337 switch (RI.getSpillSize(*RC)) {
6338 case 1:
6339 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
6340 Opc = AArch64::STRBui;
6341 break;
6342 case 2: {
6343 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
6344 Opc = AArch64::STRHui;
6345 else if (AArch64::PNRRegClass.hasSubClassEq(RC) ||
6346 AArch64::PPRRegClass.hasSubClassEq(RC)) {
6347 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6348 "Unexpected register store without SVE store instructions");
6349 Opc = AArch64::STR_PXI;
6351 }
6352 break;
6353 }
6354 case 4:
6355 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
6356 Opc = AArch64::STRWui;
6357 if (SrcReg.isVirtual())
6358 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
6359 else
6360 assert(SrcReg != AArch64::WSP);
6361 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
6362 Opc = AArch64::STRSui;
6363 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
6364 Opc = AArch64::STR_PPXI;
6366 }
6367 break;
6368 case 8:
6369 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
6370 Opc = AArch64::STRXui;
6371 if (SrcReg.isVirtual())
6372 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
6373 else
6374 assert(SrcReg != AArch64::SP);
6375 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
6376 Opc = AArch64::STRDui;
6377 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
6379 get(AArch64::STPWi), SrcReg, isKill,
6380 AArch64::sube32, AArch64::subo32, FI, MMO);
6381 return;
6382 }
6383 break;
6384 case 16:
6385 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
6386 Opc = AArch64::STRQui;
6387 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
6388 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
6389 Opc = AArch64::ST1Twov1d;
6390 Offset = false;
6391 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
6393 get(AArch64::STPXi), SrcReg, isKill,
6394 AArch64::sube64, AArch64::subo64, FI, MMO);
6395 return;
6396 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
6397 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6398 "Unexpected register store without SVE store instructions");
6399 Opc = AArch64::STR_ZXI;
6401 }
6402 break;
6403 case 24:
6404 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
6405 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
6406 Opc = AArch64::ST1Threev1d;
6407 Offset = false;
6408 }
6409 break;
6410 case 32:
6411 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
6412 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
6413 Opc = AArch64::ST1Fourv1d;
6414 Offset = false;
6415 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
6416 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
6417 Opc = AArch64::ST1Twov2d;
6418 Offset = false;
6419 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
6420 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6421 "Unexpected register store without SVE store instructions");
6422 Opc = AArch64::STR_ZZXI_STRIDED_CONTIGUOUS;
6424 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
6425 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6426 "Unexpected register store without SVE store instructions");
6427 Opc = AArch64::STR_ZZXI;
6429 }
6430 break;
6431 case 48:
6432 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
6433 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
6434 Opc = AArch64::ST1Threev2d;
6435 Offset = false;
6436 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
6437 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6438 "Unexpected register store without SVE store instructions");
6439 Opc = AArch64::STR_ZZZXI;
6441 }
6442 break;
6443 case 64:
6444 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
6445 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
6446 Opc = AArch64::ST1Fourv2d;
6447 Offset = false;
6448 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
6449 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6450 "Unexpected register store without SVE store instructions");
6451 Opc = AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS;
6453 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
6454 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6455 "Unexpected register store without SVE store instructions");
6456 Opc = AArch64::STR_ZZZZXI;
6458 }
6459 break;
6460 }
6461 assert(Opc && "Unknown register class");
6462 MFI.setStackID(FI, StackID);
6463
6465 .addReg(SrcReg, getKillRegState(isKill))
6466 .addFrameIndex(FI);
6467
6468 if (Offset)
6469 MI.addImm(0);
6470 if (PNRReg.isValid())
6471 MI.addDef(PNRReg, RegState::Implicit);
6472 MI.addMemOperand(MMO);
6473}
6474
6477 MachineBasicBlock::iterator InsertBefore,
6478 const MCInstrDesc &MCID,
6479 Register DestReg, unsigned SubIdx0,
6480 unsigned SubIdx1, int FI,
6481 MachineMemOperand *MMO) {
6482 Register DestReg0 = DestReg;
6483 Register DestReg1 = DestReg;
6484 bool IsUndef = true;
6485 if (DestReg.isPhysical()) {
6486 DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
6487 SubIdx0 = 0;
6488 DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
6489 SubIdx1 = 0;
6490 IsUndef = false;
6491 }
6492 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
6493 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
6494 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
6495 .addFrameIndex(FI)
6496 .addImm(0)
6497 .addMemOperand(MMO);
6498}
6499
6502 Register DestReg, int FI,
6503 const TargetRegisterClass *RC,
6504 Register VReg, unsigned SubReg,
6505 MachineInstr::MIFlag Flags) const {
6506 MachineFunction &MF = *MBB.getParent();
6507 MachineFrameInfo &MFI = MF.getFrameInfo();
6509 MachineMemOperand *MMO =
6511 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
6512
6513 unsigned Opc = 0;
6514 bool Offset = true;
6515 unsigned StackID = TargetStackID::Default;
6517 switch (TRI.getSpillSize(*RC)) {
6518 case 1:
6519 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
6520 Opc = AArch64::LDRBui;
6521 break;
6522 case 2: {
6523 bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
6524 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
6525 Opc = AArch64::LDRHui;
6526 else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
6527 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6528 "Unexpected register load without SVE load instructions");
6529 if (IsPNR)
6530 PNRReg = DestReg;
6531 Opc = AArch64::LDR_PXI;
6533 }
6534 break;
6535 }
6536 case 4:
6537 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
6538 Opc = AArch64::LDRWui;
6539 if (DestReg.isVirtual())
6540 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
6541 else
6542 assert(DestReg != AArch64::WSP);
6543 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
6544 Opc = AArch64::LDRSui;
6545 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
6546 Opc = AArch64::LDR_PPXI;
6548 }
6549 break;
6550 case 8:
6551 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
6552 Opc = AArch64::LDRXui;
6553 if (DestReg.isVirtual())
6554 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
6555 else
6556 assert(DestReg != AArch64::SP);
6557 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
6558 Opc = AArch64::LDRDui;
6559 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
6561 get(AArch64::LDPWi), DestReg, AArch64::sube32,
6562 AArch64::subo32, FI, MMO);
6563 return;
6564 }
6565 break;
6566 case 16:
6567 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
6568 Opc = AArch64::LDRQui;
6569 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
6570 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6571 Opc = AArch64::LD1Twov1d;
6572 Offset = false;
6573 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
6575 get(AArch64::LDPXi), DestReg, AArch64::sube64,
6576 AArch64::subo64, FI, MMO);
6577 return;
6578 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
6579 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6580 "Unexpected register load without SVE load instructions");
6581 Opc = AArch64::LDR_ZXI;
6583 }
6584 break;
6585 case 24:
6586 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
6587 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6588 Opc = AArch64::LD1Threev1d;
6589 Offset = false;
6590 }
6591 break;
6592 case 32:
6593 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
6594 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6595 Opc = AArch64::LD1Fourv1d;
6596 Offset = false;
6597 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
6598 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6599 Opc = AArch64::LD1Twov2d;
6600 Offset = false;
6601 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
6602 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6603 "Unexpected register load without SVE load instructions");
6604 Opc = AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS;
6606 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
6607 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6608 "Unexpected register load without SVE load instructions");
6609 Opc = AArch64::LDR_ZZXI;
6611 }
6612 break;
6613 case 48:
6614 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
6615 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6616 Opc = AArch64::LD1Threev2d;
6617 Offset = false;
6618 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
6619 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6620 "Unexpected register load without SVE load instructions");
6621 Opc = AArch64::LDR_ZZZXI;
6623 }
6624 break;
6625 case 64:
6626 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
6627 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6628 Opc = AArch64::LD1Fourv2d;
6629 Offset = false;
6630 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
6631 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6632 "Unexpected register load without SVE load instructions");
6633 Opc = AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS;
6635 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
6636 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6637 "Unexpected register load without SVE load instructions");
6638 Opc = AArch64::LDR_ZZZZXI;
6640 }
6641 break;
6642 }
6643
6644 assert(Opc && "Unknown register class");
6645 MFI.setStackID(FI, StackID);
6646
6648 .addReg(DestReg, getDefRegState(true))
6649 .addFrameIndex(FI);
6650 if (Offset)
6651 MI.addImm(0);
6652 if (PNRReg.isValid() && !PNRReg.isVirtual())
6653 MI.addDef(PNRReg, RegState::Implicit);
6654 MI.addMemOperand(MMO);
6655}
6656
6658 const MachineInstr &UseMI,
6659 const TargetRegisterInfo *TRI) {
6660 return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
6661 UseMI.getIterator()),
6662 [TRI](const MachineInstr &I) {
6663 return I.modifiesRegister(AArch64::NZCV, TRI) ||
6664 I.readsRegister(AArch64::NZCV, TRI);
6665 });
6666}
6667
6668void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
6669 const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
6670 // The smallest scalable element supported by scaled SVE addressing
6671 // modes are predicates, which are 2 scalable bytes in size. So the scalable
6672 // byte offset must always be a multiple of 2.
6673 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
6674
6675 // VGSized offsets are divided by '2', because the VG register is the
6676 // the number of 64bit granules as opposed to 128bit vector chunks,
6677 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
6678 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
6679 // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
6680 ByteSized = Offset.getFixed();
6681 VGSized = Offset.getScalable() / 2;
6682}
6683
6684/// Returns the offset in parts to which this frame offset can be
6685/// decomposed for the purpose of describing a frame offset.
6686/// For non-scalable offsets this is simply its byte size.
6687void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
6688 const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
6689 int64_t &NumDataVectors) {
6690 // The smallest scalable element supported by scaled SVE addressing
6691 // modes are predicates, which are 2 scalable bytes in size. So the scalable
6692 // byte offset must always be a multiple of 2.
6693 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
6694
6695 NumBytes = Offset.getFixed();
6696 NumDataVectors = 0;
6697 NumPredicateVectors = Offset.getScalable() / 2;
6698 // This method is used to get the offsets to adjust the frame offset.
6699 // If the function requires ADDPL to be used and needs more than two ADDPL
6700 // instructions, part of the offset is folded into NumDataVectors so that it
6701 // uses ADDVL for part of it, reducing the number of ADDPL instructions.
6702 if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
6703 NumPredicateVectors > 62) {
6704 NumDataVectors = NumPredicateVectors / 8;
6705 NumPredicateVectors -= NumDataVectors * 8;
6706 }
6707}
6708
6709// Convenience function to create a DWARF expression for: Constant `Operation`.
6710// This helper emits compact sequences for common cases. For example, for`-15
6711// DW_OP_plus`, this helper would create DW_OP_lit15 DW_OP_minus.
6714 if (Operation == dwarf::DW_OP_plus && Constant < 0 && -Constant <= 31) {
6715 // -Constant (1 to 31)
6716 Expr.push_back(dwarf::DW_OP_lit0 - Constant);
6717 Operation = dwarf::DW_OP_minus;
6718 } else if (Constant >= 0 && Constant <= 31) {
6719 // Literal value 0 to 31
6720 Expr.push_back(dwarf::DW_OP_lit0 + Constant);
6721 } else {
6722 // Signed constant
6723 Expr.push_back(dwarf::DW_OP_consts);
6725 }
6726 return Expr.push_back(Operation);
6727}
6728
6729// Convenience function to create a DWARF expression for a register.
6730static void appendReadRegExpr(SmallVectorImpl<char> &Expr, unsigned RegNum) {
6731 Expr.push_back((char)dwarf::DW_OP_bregx);
6733 Expr.push_back(0);
6734}
6735
6736// Convenience function to create a DWARF expression for loading a register from
6737// a CFA offset.
6739 int64_t OffsetFromDefCFA) {
6740 // This assumes the top of the DWARF stack contains the CFA.
6741 Expr.push_back(dwarf::DW_OP_dup);
6742 // Add the offset to the register.
6743 appendConstantExpr(Expr, OffsetFromDefCFA, dwarf::DW_OP_plus);
6744 // Dereference the address (loads a 64 bit value)..
6745 Expr.push_back(dwarf::DW_OP_deref);
6746}
6747
6748// Convenience function to create a comment for
6749// (+/-) NumBytes (* RegScale)?
6750static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment,
6751 StringRef RegScale = {}) {
6752 if (NumBytes) {
6753 Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
6754 if (!RegScale.empty())
6755 Comment << ' ' << RegScale;
6756 }
6757}
6758
6759// Creates an MCCFIInstruction:
6760// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
6762 unsigned Reg,
6763 const StackOffset &Offset) {
6764 int64_t NumBytes, NumVGScaledBytes;
6765 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(Offset, NumBytes,
6766 NumVGScaledBytes);
6767 std::string CommentBuffer;
6768 llvm::raw_string_ostream Comment(CommentBuffer);
6769
6770 if (Reg == AArch64::SP)
6771 Comment << "sp";
6772 else if (Reg == AArch64::FP)
6773 Comment << "fp";
6774 else
6775 Comment << printReg(Reg, &TRI);
6776
6777 // Build up the expression (Reg + NumBytes + VG * NumVGScaledBytes)
6778 SmallString<64> Expr;
6779 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6780 assert(DwarfReg <= 31 && "DwarfReg out of bounds (0..31)");
6781 // Reg + NumBytes
6782 Expr.push_back(dwarf::DW_OP_breg0 + DwarfReg);
6783 appendLEB128<LEB128Sign::Signed>(Expr, NumBytes);
6784 appendOffsetComment(NumBytes, Comment);
6785 if (NumVGScaledBytes) {
6786 // + VG * NumVGScaledBytes
6787 appendOffsetComment(NumVGScaledBytes, Comment, "* VG");
6788 appendReadRegExpr(Expr, TRI.getDwarfRegNum(AArch64::VG, true));
6789 appendConstantExpr(Expr, NumVGScaledBytes, dwarf::DW_OP_mul);
6790 Expr.push_back(dwarf::DW_OP_plus);
6791 }
6792
6793 // Wrap this into DW_CFA_def_cfa.
6794 SmallString<64> DefCfaExpr;
6795 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
6796 appendLEB128<LEB128Sign::Unsigned>(DefCfaExpr, Expr.size());
6797 DefCfaExpr.append(Expr.str());
6798 return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
6799 Comment.str());
6800}
6801
6803 unsigned FrameReg, unsigned Reg,
6804 const StackOffset &Offset,
6805 bool LastAdjustmentWasScalable) {
6806 if (Offset.getScalable())
6807 return createDefCFAExpression(TRI, Reg, Offset);
6808
6809 if (FrameReg == Reg && !LastAdjustmentWasScalable)
6810 return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed()));
6811
6812 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6813 return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed());
6814}
6815
6818 const StackOffset &OffsetFromDefCFA,
6819 std::optional<int64_t> IncomingVGOffsetFromDefCFA) {
6820 int64_t NumBytes, NumVGScaledBytes;
6821 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
6822 OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
6823
6824 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6825
6826 // Non-scalable offsets can use DW_CFA_offset directly.
6827 if (!NumVGScaledBytes)
6828 return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);
6829
6830 std::string CommentBuffer;
6831 llvm::raw_string_ostream Comment(CommentBuffer);
6832 Comment << printReg(Reg, &TRI) << " @ cfa";
6833
6834 // Build up expression (CFA + VG * NumVGScaledBytes + NumBytes)
6835 assert(NumVGScaledBytes && "Expected scalable offset");
6836 SmallString<64> OffsetExpr;
6837 // + VG * NumVGScaledBytes
6838 StringRef VGRegScale;
6839 if (IncomingVGOffsetFromDefCFA) {
6840 appendLoadRegExpr(OffsetExpr, *IncomingVGOffsetFromDefCFA);
6841 VGRegScale = "* IncomingVG";
6842 } else {
6843 appendReadRegExpr(OffsetExpr, TRI.getDwarfRegNum(AArch64::VG, true));
6844 VGRegScale = "* VG";
6845 }
6846 appendConstantExpr(OffsetExpr, NumVGScaledBytes, dwarf::DW_OP_mul);
6847 appendOffsetComment(NumVGScaledBytes, Comment, VGRegScale);
6848 OffsetExpr.push_back(dwarf::DW_OP_plus);
6849 if (NumBytes) {
6850 // + NumBytes
6851 appendOffsetComment(NumBytes, Comment);
6852 appendConstantExpr(OffsetExpr, NumBytes, dwarf::DW_OP_plus);
6853 }
6854
6855 // Wrap this into DW_CFA_expression
6856 SmallString<64> CfaExpr;
6857 CfaExpr.push_back(dwarf::DW_CFA_expression);
6858 appendLEB128<LEB128Sign::Unsigned>(CfaExpr, DwarfReg);
6859 appendLEB128<LEB128Sign::Unsigned>(CfaExpr, OffsetExpr.size());
6860 CfaExpr.append(OffsetExpr.str());
6861
6862 return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(),
6863 Comment.str());
6864}
6865
6866// Helper function to emit a frame offset adjustment from a given
6867// pointer (SrcReg), stored into DestReg. This function is explicit
6868// in that it requires the opcode.
6871 const DebugLoc &DL, unsigned DestReg,
6872 unsigned SrcReg, int64_t Offset, unsigned Opc,
6873 const TargetInstrInfo *TII,
6874 MachineInstr::MIFlag Flag, bool NeedsWinCFI,
6875 bool *HasWinCFI, bool EmitCFAOffset,
6876 StackOffset CFAOffset, unsigned FrameReg) {
6877 int Sign = 1;
6878 unsigned MaxEncoding, ShiftSize;
6879 switch (Opc) {
6880 case AArch64::ADDXri:
6881 case AArch64::ADDSXri:
6882 case AArch64::SUBXri:
6883 case AArch64::SUBSXri:
6884 MaxEncoding = 0xfff;
6885 ShiftSize = 12;
6886 break;
6887 case AArch64::ADDVL_XXI:
6888 case AArch64::ADDPL_XXI:
6889 case AArch64::ADDSVL_XXI:
6890 case AArch64::ADDSPL_XXI:
6891 MaxEncoding = 31;
6892 ShiftSize = 0;
6893 if (Offset < 0) {
6894 MaxEncoding = 32;
6895 Sign = -1;
6896 Offset = -Offset;
6897 }
6898 break;
6899 default:
6900 llvm_unreachable("Unsupported opcode");
6901 }
6902
6903 // `Offset` can be in bytes or in "scalable bytes".
6904 int VScale = 1;
6905 if (Opc == AArch64::ADDVL_XXI || Opc == AArch64::ADDSVL_XXI)
6906 VScale = 16;
6907 else if (Opc == AArch64::ADDPL_XXI || Opc == AArch64::ADDSPL_XXI)
6908 VScale = 2;
6909
6910 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
6911 // scratch register. If DestReg is a virtual register, use it as the
6912 // scratch register; otherwise, create a new virtual register (to be
6913 // replaced by the scavenger at the end of PEI). That case can be optimized
6914 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
6915 // register can be loaded with offset%8 and the add/sub can use an extending
6916 // instruction with LSL#3.
6917 // Currently the function handles any offsets but generates a poor sequence
6918 // of code.
6919 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
6920
6921 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
6922 Register TmpReg = DestReg;
6923 if (TmpReg == AArch64::XZR)
6924 TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister(
6925 &AArch64::GPR64RegClass);
6926 do {
6927 uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
6928 unsigned LocalShiftSize = 0;
6929 if (ThisVal > MaxEncoding) {
6930 ThisVal = ThisVal >> ShiftSize;
6931 LocalShiftSize = ShiftSize;
6932 }
6933 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
6934 "Encoding cannot handle value that big");
6935
6936 Offset -= ThisVal << LocalShiftSize;
6937 if (Offset == 0)
6938 TmpReg = DestReg;
6939 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
6940 .addReg(SrcReg)
6941 .addImm(Sign * (int)ThisVal);
6942 if (ShiftSize)
6943 MBI = MBI.addImm(
6945 MBI = MBI.setMIFlag(Flag);
6946
6947 auto Change =
6948 VScale == 1
6949 ? StackOffset::getFixed(ThisVal << LocalShiftSize)
6950 : StackOffset::getScalable(VScale * (ThisVal << LocalShiftSize));
6951 if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri)
6952 CFAOffset += Change;
6953 else
6954 CFAOffset -= Change;
6955 if (EmitCFAOffset && DestReg == TmpReg) {
6956 MachineFunction &MF = *MBB.getParent();
6957 const TargetSubtargetInfo &STI = MF.getSubtarget();
6958 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
6959
6960 unsigned CFIIndex = MF.addFrameInst(
6961 createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1));
6962 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
6963 .addCFIIndex(CFIIndex)
6964 .setMIFlags(Flag);
6965 }
6966
6967 if (NeedsWinCFI) {
6968 int Imm = (int)(ThisVal << LocalShiftSize);
6969 if (VScale != 1 && DestReg == AArch64::SP) {
6970 if (HasWinCFI)
6971 *HasWinCFI = true;
6972 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AllocZ))
6973 .addImm(ThisVal)
6974 .setMIFlag(Flag);
6975 } else if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
6976 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
6977 assert(VScale == 1 && "Expected non-scalable operation");
6978 if (HasWinCFI)
6979 *HasWinCFI = true;
6980 if (Imm == 0)
6981 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
6982 else
6983 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
6984 .addImm(Imm)
6985 .setMIFlag(Flag);
6986 assert(Offset == 0 && "Expected remaining offset to be zero to "
6987 "emit a single SEH directive");
6988 } else if (DestReg == AArch64::SP) {
6989 assert(VScale == 1 && "Expected non-scalable operation");
6990 if (HasWinCFI)
6991 *HasWinCFI = true;
6992 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
6993 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
6994 .addImm(Imm)
6995 .setMIFlag(Flag);
6996 }
6997 }
6998
6999 SrcReg = TmpReg;
7000 } while (Offset);
7001}
7002
7005 unsigned DestReg, unsigned SrcReg,
7007 MachineInstr::MIFlag Flag, bool SetNZCV,
7008 bool NeedsWinCFI, bool *HasWinCFI,
7009 bool EmitCFAOffset, StackOffset CFAOffset,
7010 unsigned FrameReg) {
7011 // If a function is marked as arm_locally_streaming, then the runtime value of
7012 // vscale in the prologue/epilogue is different the runtime value of vscale
7013 // in the function's body. To avoid having to consider multiple vscales,
7014 // we can use `addsvl` to allocate any scalable stack-slots, which under
7015 // most circumstances will be only locals, not callee-save slots.
7016 const Function &F = MBB.getParent()->getFunction();
7017 bool UseSVL = F.hasFnAttribute("aarch64_pstate_sm_body");
7018
7019 int64_t Bytes, NumPredicateVectors, NumDataVectors;
7020 AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
7021 Offset, Bytes, NumPredicateVectors, NumDataVectors);
7022
7023 // Insert ADDSXri for scalable offset at the end.
7024 bool NeedsFinalDefNZCV = SetNZCV && (NumPredicateVectors || NumDataVectors);
7025 if (NeedsFinalDefNZCV)
7026 SetNZCV = false;
7027
7028 // First emit non-scalable frame offsets, or a simple 'mov'.
7029 if (Bytes || (!Offset && SrcReg != DestReg)) {
7030 assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
7031 "SP increment/decrement not 8-byte aligned");
7032 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
7033 if (Bytes < 0) {
7034 Bytes = -Bytes;
7035 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
7036 }
7037 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
7038 NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
7039 FrameReg);
7040 CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri)
7041 ? StackOffset::getFixed(-Bytes)
7042 : StackOffset::getFixed(Bytes);
7043 SrcReg = DestReg;
7044 FrameReg = DestReg;
7045 }
7046
7047 assert(!(NeedsWinCFI && NumPredicateVectors) &&
7048 "WinCFI can't allocate fractions of an SVE data vector");
7049
7050 if (NumDataVectors) {
7051 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
7052 UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI, TII,
7053 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
7054 FrameReg);
7055 CFAOffset += StackOffset::getScalable(-NumDataVectors * 16);
7056 SrcReg = DestReg;
7057 }
7058
7059 if (NumPredicateVectors) {
7060 assert(DestReg != AArch64::SP && "Unaligned access to SP");
7061 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
7062 UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI, TII,
7063 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
7064 FrameReg);
7065 }
7066
7067 if (NeedsFinalDefNZCV)
7068 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDSXri), DestReg)
7069 .addReg(DestReg)
7070 .addImm(0)
7071 .addImm(0);
7072}
7073
7076 int FrameIndex, MachineInstr *&CopyMI, LiveIntervals *LIS,
7077 VirtRegMap *VRM) const {
7079 // This is a bit of a hack. Consider this instruction:
7080 //
7081 // %0 = COPY %sp; GPR64all:%0
7082 //
7083 // We explicitly chose GPR64all for the virtual register so such a copy might
7084 // be eliminated by RegisterCoalescer. However, that may not be possible, and
7085 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
7086 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
7087 //
7088 // To prevent that, we are going to constrain the %0 register class here.
7089 if (MI.isFullCopy()) {
7090 Register DstReg = MI.getOperand(0).getReg();
7091 Register SrcReg = MI.getOperand(1).getReg();
7092 if (SrcReg == AArch64::SP && DstReg.isVirtual()) {
7093 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
7094 return nullptr;
7095 }
7096 if (DstReg == AArch64::SP && SrcReg.isVirtual()) {
7097 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
7098 return nullptr;
7099 }
7100 // Nothing can folded with copy from/to NZCV.
7101 if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV)
7102 return nullptr;
7103 }
7104
7105 // Handle the case where a copy is being spilled or filled but the source
7106 // and destination register class don't match. For example:
7107 //
7108 // %0 = COPY %xzr; GPR64common:%0
7109 //
7110 // In this case we can still safely fold away the COPY and generate the
7111 // following spill code:
7112 //
7113 // STRXui %xzr, %stack.0
7114 //
7115 // This also eliminates spilled cross register class COPYs (e.g. between x and
7116 // d regs) of the same size. For example:
7117 //
7118 // %0 = COPY %1; GPR64:%0, FPR64:%1
7119 //
7120 // will be filled as
7121 //
7122 // LDRDui %0, fi<#0>
7123 //
7124 // instead of
7125 //
7126 // LDRXui %Temp, fi<#0>
7127 // %0 = FMOV %Temp
7128 //
7129 if (MI.isCopy() && Ops.size() == 1 &&
7130 // Make sure we're only folding the explicit COPY defs/uses.
7131 (Ops[0] == 0 || Ops[0] == 1)) {
7132 bool IsSpill = Ops[0] == 0;
7133 bool IsFill = !IsSpill;
7135 const MachineRegisterInfo &MRI = MF.getRegInfo();
7136 MachineBasicBlock &MBB = *MI.getParent();
7137 const MachineOperand &DstMO = MI.getOperand(0);
7138 const MachineOperand &SrcMO = MI.getOperand(1);
7139 Register DstReg = DstMO.getReg();
7140 Register SrcReg = SrcMO.getReg();
7141 // This is slightly expensive to compute for physical regs since
7142 // getMinimalPhysRegClass is slow.
7143 auto getRegClass = [&](unsigned Reg) {
7144 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
7145 : TRI.getMinimalPhysRegClass(Reg);
7146 };
7147
7148 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
7149 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
7150 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
7151 "Mismatched register size in non subreg COPY");
7152 if (IsSpill)
7153 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
7154 getRegClass(SrcReg), Register());
7155 else
7156 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
7157 getRegClass(DstReg), Register());
7158 return &*--InsertPt;
7159 }
7160
7161 // Handle cases like spilling def of:
7162 //
7163 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
7164 //
7165 // where the physical register source can be widened and stored to the full
7166 // virtual reg destination stack slot, in this case producing:
7167 //
7168 // STRXui %xzr, %stack.0
7169 //
7170 if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
7171 TRI.getRegSizeInBits(*getRegClass(DstReg)) == 64) {
7172 assert(SrcMO.getSubReg() == 0 &&
7173 "Unexpected subreg on physical register");
7174 storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(),
7175 FrameIndex, &AArch64::GPR64RegClass, Register());
7176 return &*--InsertPt;
7177 }
7178
7179 // Handle cases like filling use of:
7180 //
7181 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
7182 //
7183 // where we can load the full virtual reg source stack slot, into the subreg
7184 // destination, in this case producing:
7185 //
7186 // LDRWui %0:sub_32<def,read-undef>, %stack.0
7187 //
7188 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
7189 const TargetRegisterClass *FillRC = nullptr;
7190 switch (DstMO.getSubReg()) {
7191 default:
7192 break;
7193 case AArch64::sub_32:
7194 if (AArch64::GPR64RegClass.hasSubClassEq(getRegClass(DstReg)))
7195 FillRC = &AArch64::GPR32RegClass;
7196 break;
7197 case AArch64::ssub:
7198 FillRC = &AArch64::FPR32RegClass;
7199 break;
7200 case AArch64::dsub:
7201 FillRC = &AArch64::FPR64RegClass;
7202 break;
7203 }
7204
7205 if (FillRC) {
7206 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
7207 TRI.getRegSizeInBits(*FillRC) &&
7208 "Mismatched regclass size on folded subreg COPY");
7209 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC,
7210 Register());
7211 MachineInstr &LoadMI = *--InsertPt;
7212 MachineOperand &LoadDst = LoadMI.getOperand(0);
7213 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
7214 LoadDst.setSubReg(DstMO.getSubReg());
7215 LoadDst.setIsUndef();
7216 return &LoadMI;
7217 }
7218 }
7219 }
7220
7221 // Cannot fold.
7222 return nullptr;
7223}
7224
7226 StackOffset &SOffset,
7227 bool *OutUseUnscaledOp,
7228 unsigned *OutUnscaledOp,
7229 int64_t *EmittableOffset) {
7230 // Set output values in case of early exit.
7231 if (EmittableOffset)
7232 *EmittableOffset = 0;
7233 if (OutUseUnscaledOp)
7234 *OutUseUnscaledOp = false;
7235 if (OutUnscaledOp)
7236 *OutUnscaledOp = 0;
7237
7238 // Exit early for structured vector spills/fills as they can't take an
7239 // immediate offset.
7240 switch (MI.getOpcode()) {
7241 default:
7242 break;
7243 case AArch64::LD1Rv1d:
7244 case AArch64::LD1Rv2s:
7245 case AArch64::LD1Rv2d:
7246 case AArch64::LD1Rv4h:
7247 case AArch64::LD1Rv4s:
7248 case AArch64::LD1Rv8b:
7249 case AArch64::LD1Rv8h:
7250 case AArch64::LD1Rv16b:
7251 case AArch64::LD1Twov2d:
7252 case AArch64::LD1Threev2d:
7253 case AArch64::LD1Fourv2d:
7254 case AArch64::LD1Twov1d:
7255 case AArch64::LD1Threev1d:
7256 case AArch64::LD1Fourv1d:
7257 case AArch64::ST1Twov2d:
7258 case AArch64::ST1Threev2d:
7259 case AArch64::ST1Fourv2d:
7260 case AArch64::ST1Twov1d:
7261 case AArch64::ST1Threev1d:
7262 case AArch64::ST1Fourv1d:
7263 case AArch64::ST1i8:
7264 case AArch64::ST1i16:
7265 case AArch64::ST1i32:
7266 case AArch64::ST1i64:
7267 case AArch64::IRG:
7268 case AArch64::IRGstack:
7269 case AArch64::STGloop:
7270 case AArch64::STZGloop:
7272 }
7273
7274 // Get the min/max offset and the scale.
7275 TypeSize ScaleValue(0U, false), Width(0U, false);
7276 int64_t MinOff, MaxOff;
7277 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
7278 MaxOff))
7279 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
7280
7281 // Construct the complete offset.
7282 bool IsMulVL = ScaleValue.isScalable();
7283 unsigned Scale = ScaleValue.getKnownMinValue();
7284 int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
7285
7286 const MachineOperand &ImmOpnd =
7287 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
7288 Offset += ImmOpnd.getImm() * Scale;
7289
7290 // If the offset doesn't match the scale, we rewrite the instruction to
7291 // use the unscaled instruction instead. Likewise, if we have a negative
7292 // offset and there is an unscaled op to use.
7293 std::optional<unsigned> UnscaledOp =
7295 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
7296 if (useUnscaledOp &&
7297 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
7298 MaxOff))
7299 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
7300
7301 Scale = ScaleValue.getKnownMinValue();
7302 assert(IsMulVL == ScaleValue.isScalable() &&
7303 "Unscaled opcode has different value for scalable");
7304
7305 int64_t Remainder = Offset % Scale;
7306 assert(!(Remainder && useUnscaledOp) &&
7307 "Cannot have remainder when using unscaled op");
7308
7309 assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
7310 int64_t NewOffset = Offset / Scale;
7311 if (MinOff <= NewOffset && NewOffset <= MaxOff)
7312 Offset = Remainder;
7313 else {
7314 // Try to minimise the number of instructions required to materialise the
7315 // offset calculation. Specifically, for fixed offsets, if masking out the
7316 // low 12 bits leaves a legal add immediate, we can realise the offset
7317 // calculation with a single add instruction. Whenever this is possible,
7318 // prefer this split.
7319 int64_t HighPart = Offset & ~0xFFF;
7320 int64_t LowPart = Offset & 0xFFF;
7321 int64_t LowScaled = LowPart / Scale;
7322 if (!IsMulVL && NewOffset >= 0 && LowPart % Scale == 0 &&
7323 MinOff <= LowScaled && LowScaled <= MaxOff &&
7325 NewOffset = LowScaled;
7326 Offset = HighPart;
7327 } else {
7328 // Default to a greedy split: take the memop immediate to be maximum /
7329 // minimum expressible offset and materialise the remainder.
7330 NewOffset = NewOffset < 0 ? MinOff : MaxOff;
7331 Offset = Offset - (NewOffset * Scale);
7332 }
7333 }
7334
7335 if (EmittableOffset)
7336 *EmittableOffset = NewOffset;
7337 if (OutUseUnscaledOp)
7338 *OutUseUnscaledOp = useUnscaledOp;
7339 if (OutUnscaledOp && UnscaledOp)
7340 *OutUnscaledOp = *UnscaledOp;
7341
7342 if (IsMulVL)
7343 SOffset = StackOffset::get(SOffset.getFixed(), Offset);
7344 else
7345 SOffset = StackOffset::get(Offset, SOffset.getScalable());
7347 (SOffset ? 0 : AArch64FrameOffsetIsLegal);
7348}
7349
7351 unsigned FrameReg, StackOffset &Offset,
7352 const AArch64InstrInfo *TII) {
7353 unsigned Opcode = MI.getOpcode();
7354 unsigned ImmIdx = FrameRegIdx + 1;
7355
7356 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
7357 Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
7358 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
7359 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
7360 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
7361 MI.eraseFromParent();
7362 Offset = StackOffset();
7363 return true;
7364 }
7365
7366 int64_t NewOffset;
7367 unsigned UnscaledOp;
7368 bool UseUnscaledOp;
7369 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
7370 &UnscaledOp, &NewOffset);
7373 // Replace the FrameIndex with FrameReg.
7374 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
7375 if (UseUnscaledOp)
7376 MI.setDesc(TII->get(UnscaledOp));
7377
7378 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
7379 return !Offset;
7380 }
7381
7382 return false;
7383}
7384
7390
7391MCInst AArch64InstrInfo::getNop() const { return MCInstBuilder(AArch64::NOP); }
7392
7393// AArch64 supports MachineCombiner.
7394bool AArch64InstrInfo::useMachineCombiner() const { return true; }
7395
7396// True when Opc sets flag
7397static bool isCombineInstrSettingFlag(unsigned Opc) {
7398 switch (Opc) {
7399 case AArch64::ADDSWrr:
7400 case AArch64::ADDSWri:
7401 case AArch64::ADDSXrr:
7402 case AArch64::ADDSXri:
7403 case AArch64::SUBSWrr:
7404 case AArch64::SUBSXrr:
7405 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
7406 case AArch64::SUBSWri:
7407 case AArch64::SUBSXri:
7408 return true;
7409 default:
7410 break;
7411 }
7412 return false;
7413}
7414
7415// 32b Opcodes that can be combined with a MUL
7416static bool isCombineInstrCandidate32(unsigned Opc) {
7417 switch (Opc) {
7418 case AArch64::ADDWrr:
7419 case AArch64::ADDWri:
7420 case AArch64::SUBWrr:
7421 case AArch64::ADDSWrr:
7422 case AArch64::ADDSWri:
7423 case AArch64::SUBSWrr:
7424 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
7425 case AArch64::SUBWri:
7426 case AArch64::SUBSWri:
7427 return true;
7428 default:
7429 break;
7430 }
7431 return false;
7432}
7433
7434// 64b Opcodes that can be combined with a MUL
7435static bool isCombineInstrCandidate64(unsigned Opc) {
7436 switch (Opc) {
7437 case AArch64::ADDXrr:
7438 case AArch64::ADDXri:
7439 case AArch64::SUBXrr:
7440 case AArch64::ADDSXrr:
7441 case AArch64::ADDSXri:
7442 case AArch64::SUBSXrr:
7443 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
7444 case AArch64::SUBXri:
7445 case AArch64::SUBSXri:
7446 case AArch64::ADDv8i8:
7447 case AArch64::ADDv16i8:
7448 case AArch64::ADDv4i16:
7449 case AArch64::ADDv8i16:
7450 case AArch64::ADDv2i32:
7451 case AArch64::ADDv4i32:
7452 case AArch64::SUBv8i8:
7453 case AArch64::SUBv16i8:
7454 case AArch64::SUBv4i16:
7455 case AArch64::SUBv8i16:
7456 case AArch64::SUBv2i32:
7457 case AArch64::SUBv4i32:
7458 return true;
7459 default:
7460 break;
7461 }
7462 return false;
7463}
7464
7465// FP Opcodes that can be combined with a FMUL.
7466static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
7467 switch (Inst.getOpcode()) {
7468 default:
7469 break;
7470 case AArch64::FADDHrr:
7471 case AArch64::FADDSrr:
7472 case AArch64::FADDDrr:
7473 case AArch64::FADDv4f16:
7474 case AArch64::FADDv8f16:
7475 case AArch64::FADDv2f32:
7476 case AArch64::FADDv2f64:
7477 case AArch64::FADDv4f32:
7478 case AArch64::FSUBHrr:
7479 case AArch64::FSUBSrr:
7480 case AArch64::FSUBDrr:
7481 case AArch64::FSUBv4f16:
7482 case AArch64::FSUBv8f16:
7483 case AArch64::FSUBv2f32:
7484 case AArch64::FSUBv2f64:
7485 case AArch64::FSUBv4f32:
7487 // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
7488 // the target options or if FADD/FSUB has the contract fast-math flag.
7489 return Options.AllowFPOpFusion == FPOpFusion::Fast ||
7491 }
7492 return false;
7493}
7494
7495// Opcodes that can be combined with a MUL
7499
7500//
7501// Utility routine that checks if \param MO is defined by an
7502// \param CombineOpc instruction in the basic block \param MBB
7504 unsigned CombineOpc, unsigned ZeroReg = 0,
7505 bool CheckZeroReg = false) {
7506 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7507 MachineInstr *MI = nullptr;
7508
7509 if (MO.isReg() && MO.getReg().isVirtual())
7510 MI = MRI.getUniqueVRegDef(MO.getReg());
7511 // And it needs to be in the trace (otherwise, it won't have a depth).
7512 if (!MI || MI->getParent() != &MBB || MI->getOpcode() != CombineOpc)
7513 return false;
7514 // Must only used by the user we combine with.
7515 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
7516 return false;
7517
7518 if (CheckZeroReg) {
7519 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
7520 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
7521 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
7522 // The third input reg must be zero.
7523 if (MI->getOperand(3).getReg() != ZeroReg)
7524 return false;
7525 }
7526
7527 if (isCombineInstrSettingFlag(CombineOpc) &&
7528 MI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) == -1)
7529 return false;
7530
7531 return true;
7532}
7533
7534//
7535// Is \param MO defined by an integer multiply and can be combined?
7537 unsigned MulOpc, unsigned ZeroReg) {
7538 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
7539}
7540
7541//
7542// Is \param MO defined by a floating-point multiply and can be combined?
7544 unsigned MulOpc) {
7545 return canCombine(MBB, MO, MulOpc);
7546}
7547
7548// TODO: There are many more machine instruction opcodes to match:
7549// 1. Other data types (integer, vectors)
7550// 2. Other math / logic operations (xor, or)
7551// 3. Other forms of the same operation (intrinsics and other variants)
7552bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
7553 bool Invert) const {
7554 if (Invert)
7555 return false;
7556 switch (Inst.getOpcode()) {
7557 // == Floating-point types ==
7558 // -- Floating-point instructions --
7559 case AArch64::FADDHrr:
7560 case AArch64::FADDSrr:
7561 case AArch64::FADDDrr:
7562 case AArch64::FMULHrr:
7563 case AArch64::FMULSrr:
7564 case AArch64::FMULDrr:
7565 case AArch64::FMULX16:
7566 case AArch64::FMULX32:
7567 case AArch64::FMULX64:
7568 // -- Advanced SIMD instructions --
7569 case AArch64::FADDv4f16:
7570 case AArch64::FADDv8f16:
7571 case AArch64::FADDv2f32:
7572 case AArch64::FADDv4f32:
7573 case AArch64::FADDv2f64:
7574 case AArch64::FMULv4f16:
7575 case AArch64::FMULv8f16:
7576 case AArch64::FMULv2f32:
7577 case AArch64::FMULv4f32:
7578 case AArch64::FMULv2f64:
7579 case AArch64::FMULXv4f16:
7580 case AArch64::FMULXv8f16:
7581 case AArch64::FMULXv2f32:
7582 case AArch64::FMULXv4f32:
7583 case AArch64::FMULXv2f64:
7584 // -- SVE instructions --
7585 // Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX
7586 // in the SVE instruction set (though there are predicated ones).
7587 case AArch64::FADD_ZZZ_H:
7588 case AArch64::FADD_ZZZ_S:
7589 case AArch64::FADD_ZZZ_D:
7590 case AArch64::FMUL_ZZZ_H:
7591 case AArch64::FMUL_ZZZ_S:
7592 case AArch64::FMUL_ZZZ_D:
7595
7596 // == Integer types ==
7597 // -- Base instructions --
7598 // Opcodes MULWrr and MULXrr don't exist because
7599 // `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of
7600 // `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively.
7601 // The machine-combiner does not support three-source-operands machine
7602 // instruction. So we cannot reassociate MULs.
7603 case AArch64::ADDWrr:
7604 case AArch64::ADDXrr:
7605 case AArch64::ANDWrr:
7606 case AArch64::ANDXrr:
7607 case AArch64::ORRWrr:
7608 case AArch64::ORRXrr:
7609 case AArch64::EORWrr:
7610 case AArch64::EORXrr:
7611 case AArch64::EONWrr:
7612 case AArch64::EONXrr:
7613 // -- Advanced SIMD instructions --
7614 // Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL
7615 // in the Advanced SIMD instruction set.
7616 case AArch64::ADDv8i8:
7617 case AArch64::ADDv16i8:
7618 case AArch64::ADDv4i16:
7619 case AArch64::ADDv8i16:
7620 case AArch64::ADDv2i32:
7621 case AArch64::ADDv4i32:
7622 case AArch64::ADDv1i64:
7623 case AArch64::ADDv2i64:
7624 case AArch64::MULv8i8:
7625 case AArch64::MULv16i8:
7626 case AArch64::MULv4i16:
7627 case AArch64::MULv8i16:
7628 case AArch64::MULv2i32:
7629 case AArch64::MULv4i32:
7630 case AArch64::ANDv8i8:
7631 case AArch64::ANDv16i8:
7632 case AArch64::ORRv8i8:
7633 case AArch64::ORRv16i8:
7634 case AArch64::EORv8i8:
7635 case AArch64::EORv16i8:
7636 // -- SVE instructions --
7637 case AArch64::ADD_ZZZ_B:
7638 case AArch64::ADD_ZZZ_H:
7639 case AArch64::ADD_ZZZ_S:
7640 case AArch64::ADD_ZZZ_D:
7641 case AArch64::MUL_ZZZ_B:
7642 case AArch64::MUL_ZZZ_H:
7643 case AArch64::MUL_ZZZ_S:
7644 case AArch64::MUL_ZZZ_D:
7645 case AArch64::AND_ZZZ:
7646 case AArch64::ORR_ZZZ:
7647 case AArch64::EOR_ZZZ:
7648 return true;
7649
7650 default:
7651 return false;
7652 }
7653}
7654
7655/// Find instructions that can be turned into madd.
7657 SmallVectorImpl<unsigned> &Patterns) {
7658 unsigned Opc = Root.getOpcode();
7659 MachineBasicBlock &MBB = *Root.getParent();
7660 bool Found = false;
7661
7663 return false;
7665 int Cmp_NZCV =
7666 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
7667 // When NZCV is live bail out.
7668 if (Cmp_NZCV == -1)
7669 return false;
7670 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
7671 // When opcode can't change bail out.
7672 // CHECKME: do we miss any cases for opcode conversion?
7673 if (NewOpc == Opc)
7674 return false;
7675 Opc = NewOpc;
7676 }
7677
7678 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
7679 unsigned Pattern) {
7680 if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
7681 Patterns.push_back(Pattern);
7682 Found = true;
7683 }
7684 };
7685
7686 auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) {
7687 if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
7688 Patterns.push_back(Pattern);
7689 Found = true;
7690 }
7691 };
7692
7694
7695 switch (Opc) {
7696 default:
7697 break;
7698 case AArch64::ADDWrr:
7699 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7700 "ADDWrr does not have register operands");
7701 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
7702 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
7703 break;
7704 case AArch64::ADDXrr:
7705 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
7706 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
7707 break;
7708 case AArch64::SUBWrr:
7709 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
7710 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
7711 break;
7712 case AArch64::SUBXrr:
7713 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
7714 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
7715 break;
7716 case AArch64::ADDWri:
7717 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
7718 break;
7719 case AArch64::ADDXri:
7720 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
7721 break;
7722 case AArch64::SUBWri:
7723 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
7724 break;
7725 case AArch64::SUBXri:
7726 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
7727 break;
7728 case AArch64::ADDv8i8:
7729 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
7730 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
7731 break;
7732 case AArch64::ADDv16i8:
7733 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
7734 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
7735 break;
7736 case AArch64::ADDv4i16:
7737 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
7738 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
7739 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
7740 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
7741 break;
7742 case AArch64::ADDv8i16:
7743 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
7744 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
7745 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
7746 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
7747 break;
7748 case AArch64::ADDv2i32:
7749 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
7750 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
7751 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
7752 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
7753 break;
7754 case AArch64::ADDv4i32:
7755 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
7756 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
7757 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
7758 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
7759 break;
7760 case AArch64::SUBv8i8:
7761 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
7762 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
7763 break;
7764 case AArch64::SUBv16i8:
7765 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
7766 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
7767 break;
7768 case AArch64::SUBv4i16:
7769 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
7770 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
7771 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
7772 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
7773 break;
7774 case AArch64::SUBv8i16:
7775 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
7776 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
7777 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
7778 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
7779 break;
7780 case AArch64::SUBv2i32:
7781 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
7782 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
7783 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
7784 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
7785 break;
7786 case AArch64::SUBv4i32:
7787 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
7788 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
7789 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
7790 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
7791 break;
7792 }
7793 return Found;
7794}
7795
7796bool AArch64InstrInfo::isAccumulationOpcode(unsigned Opcode) const {
7797 switch (Opcode) {
7798 default:
7799 break;
7800 case AArch64::UABALB_ZZZ_D:
7801 case AArch64::UABALB_ZZZ_H:
7802 case AArch64::UABALB_ZZZ_S:
7803 case AArch64::UABALT_ZZZ_D:
7804 case AArch64::UABALT_ZZZ_H:
7805 case AArch64::UABALT_ZZZ_S:
7806 case AArch64::SABALB_ZZZ_D:
7807 case AArch64::SABALB_ZZZ_S:
7808 case AArch64::SABALB_ZZZ_H:
7809 case AArch64::SABALT_ZZZ_D:
7810 case AArch64::SABALT_ZZZ_S:
7811 case AArch64::SABALT_ZZZ_H:
7812 case AArch64::UABALv16i8_v8i16:
7813 case AArch64::UABALv2i32_v2i64:
7814 case AArch64::UABALv4i16_v4i32:
7815 case AArch64::UABALv4i32_v2i64:
7816 case AArch64::UABALv8i16_v4i32:
7817 case AArch64::UABALv8i8_v8i16:
7818 case AArch64::UABAv16i8:
7819 case AArch64::UABAv2i32:
7820 case AArch64::UABAv4i16:
7821 case AArch64::UABAv4i32:
7822 case AArch64::UABAv8i16:
7823 case AArch64::UABAv8i8:
7824 case AArch64::SABALv16i8_v8i16:
7825 case AArch64::SABALv2i32_v2i64:
7826 case AArch64::SABALv4i16_v4i32:
7827 case AArch64::SABALv4i32_v2i64:
7828 case AArch64::SABALv8i16_v4i32:
7829 case AArch64::SABALv8i8_v8i16:
7830 case AArch64::SABAv16i8:
7831 case AArch64::SABAv2i32:
7832 case AArch64::SABAv4i16:
7833 case AArch64::SABAv4i32:
7834 case AArch64::SABAv8i16:
7835 case AArch64::SABAv8i8:
7836 return true;
7837 }
7838
7839 return false;
7840}
7841
7842unsigned AArch64InstrInfo::getAccumulationStartOpcode(
7843 unsigned AccumulationOpcode) const {
7844 switch (AccumulationOpcode) {
7845 default:
7846 llvm_unreachable("Unsupported accumulation Opcode!");
7847 case AArch64::UABALB_ZZZ_D:
7848 return AArch64::UABDLB_ZZZ_D;
7849 case AArch64::UABALB_ZZZ_H:
7850 return AArch64::UABDLB_ZZZ_H;
7851 case AArch64::UABALB_ZZZ_S:
7852 return AArch64::UABDLB_ZZZ_S;
7853 case AArch64::UABALT_ZZZ_D:
7854 return AArch64::UABDLT_ZZZ_D;
7855 case AArch64::UABALT_ZZZ_H:
7856 return AArch64::UABDLT_ZZZ_H;
7857 case AArch64::UABALT_ZZZ_S:
7858 return AArch64::UABDLT_ZZZ_S;
7859 case AArch64::UABALv16i8_v8i16:
7860 return AArch64::UABDLv16i8_v8i16;
7861 case AArch64::UABALv2i32_v2i64:
7862 return AArch64::UABDLv2i32_v2i64;
7863 case AArch64::UABALv4i16_v4i32:
7864 return AArch64::UABDLv4i16_v4i32;
7865 case AArch64::UABALv4i32_v2i64:
7866 return AArch64::UABDLv4i32_v2i64;
7867 case AArch64::UABALv8i16_v4i32:
7868 return AArch64::UABDLv8i16_v4i32;
7869 case AArch64::UABALv8i8_v8i16:
7870 return AArch64::UABDLv8i8_v8i16;
7871 case AArch64::UABAv16i8:
7872 return AArch64::UABDv16i8;
7873 case AArch64::UABAv2i32:
7874 return AArch64::UABDv2i32;
7875 case AArch64::UABAv4i16:
7876 return AArch64::UABDv4i16;
7877 case AArch64::UABAv4i32:
7878 return AArch64::UABDv4i32;
7879 case AArch64::UABAv8i16:
7880 return AArch64::UABDv8i16;
7881 case AArch64::UABAv8i8:
7882 return AArch64::UABDv8i8;
7883 case AArch64::SABALB_ZZZ_D:
7884 return AArch64::SABDLB_ZZZ_D;
7885 case AArch64::SABALB_ZZZ_S:
7886 return AArch64::SABDLB_ZZZ_S;
7887 case AArch64::SABALB_ZZZ_H:
7888 return AArch64::SABDLB_ZZZ_H;
7889 case AArch64::SABALT_ZZZ_D:
7890 return AArch64::SABDLT_ZZZ_D;
7891 case AArch64::SABALT_ZZZ_S:
7892 return AArch64::SABDLT_ZZZ_S;
7893 case AArch64::SABALT_ZZZ_H:
7894 return AArch64::SABDLT_ZZZ_H;
7895 case AArch64::SABALv16i8_v8i16:
7896 return AArch64::SABDLv16i8_v8i16;
7897 case AArch64::SABALv2i32_v2i64:
7898 return AArch64::SABDLv2i32_v2i64;
7899 case AArch64::SABALv4i16_v4i32:
7900 return AArch64::SABDLv4i16_v4i32;
7901 case AArch64::SABALv4i32_v2i64:
7902 return AArch64::SABDLv4i32_v2i64;
7903 case AArch64::SABALv8i16_v4i32:
7904 return AArch64::SABDLv8i16_v4i32;
7905 case AArch64::SABALv8i8_v8i16:
7906 return AArch64::SABDLv8i8_v8i16;
7907 case AArch64::SABAv16i8:
7908 return AArch64::SABDv16i8;
7909 case AArch64::SABAv2i32:
7910 return AArch64::SABAv2i32;
7911 case AArch64::SABAv4i16:
7912 return AArch64::SABDv4i16;
7913 case AArch64::SABAv4i32:
7914 return AArch64::SABDv4i32;
7915 case AArch64::SABAv8i16:
7916 return AArch64::SABDv8i16;
7917 case AArch64::SABAv8i8:
7918 return AArch64::SABDv8i8;
7919 }
7920}
7921
7922/// Floating-Point Support
7923
7924/// Find instructions that can be turned into madd.
7926 SmallVectorImpl<unsigned> &Patterns) {
7927
7928 if (!isCombineInstrCandidateFP(Root))
7929 return false;
7930
7931 MachineBasicBlock &MBB = *Root.getParent();
7932 bool Found = false;
7933
7934 auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool {
7935 if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
7936 Patterns.push_back(Pattern);
7937 return true;
7938 }
7939 return false;
7940 };
7941
7943
7944 switch (Root.getOpcode()) {
7945 default:
7946 assert(false && "Unsupported FP instruction in combiner\n");
7947 break;
7948 case AArch64::FADDHrr:
7949 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7950 "FADDHrr does not have register operands");
7951
7952 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
7953 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
7954 break;
7955 case AArch64::FADDSrr:
7956 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7957 "FADDSrr does not have register operands");
7958
7959 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
7960 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
7961
7962 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
7963 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
7964 break;
7965 case AArch64::FADDDrr:
7966 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
7967 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
7968
7969 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
7970 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
7971 break;
7972 case AArch64::FADDv4f16:
7973 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
7974 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
7975
7976 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
7977 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
7978 break;
7979 case AArch64::FADDv8f16:
7980 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
7981 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
7982
7983 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
7984 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
7985 break;
7986 case AArch64::FADDv2f32:
7987 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
7988 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
7989
7990 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
7991 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
7992 break;
7993 case AArch64::FADDv2f64:
7994 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
7995 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
7996
7997 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
7998 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
7999 break;
8000 case AArch64::FADDv4f32:
8001 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
8002 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
8003
8004 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
8005 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
8006 break;
8007 case AArch64::FSUBHrr:
8008 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
8009 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
8010 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
8011 break;
8012 case AArch64::FSUBSrr:
8013 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
8014
8015 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
8016 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
8017
8018 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
8019 break;
8020 case AArch64::FSUBDrr:
8021 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
8022
8023 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
8024 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
8025
8026 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
8027 break;
8028 case AArch64::FSUBv4f16:
8029 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
8030 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
8031
8032 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
8033 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
8034 break;
8035 case AArch64::FSUBv8f16:
8036 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
8037 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
8038
8039 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
8040 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
8041 break;
8042 case AArch64::FSUBv2f32:
8043 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
8044 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
8045
8046 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
8047 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
8048 break;
8049 case AArch64::FSUBv2f64:
8050 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
8051 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
8052
8053 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
8054 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
8055 break;
8056 case AArch64::FSUBv4f32:
8057 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
8058 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
8059
8060 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
8061 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
8062 break;
8063 }
8064 return Found;
8065}
8066
8068 SmallVectorImpl<unsigned> &Patterns) {
8069 MachineBasicBlock &MBB = *Root.getParent();
8070 bool Found = false;
8071
8072 auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool {
8073 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
8074 MachineOperand &MO = Root.getOperand(Operand);
8075 MachineInstr *MI = nullptr;
8076 if (MO.isReg() && MO.getReg().isVirtual())
8077 MI = MRI.getUniqueVRegDef(MO.getReg());
8078 // Ignore No-op COPYs in FMUL(COPY(DUP(..)))
8079 if (MI && MI->getOpcode() == TargetOpcode::COPY &&
8080 MI->getOperand(1).getReg().isVirtual())
8081 MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg());
8082 if (MI && MI->getOpcode() == Opcode) {
8083 Patterns.push_back(Pattern);
8084 return true;
8085 }
8086 return false;
8087 };
8088
8090
8091 switch (Root.getOpcode()) {
8092 default:
8093 return false;
8094 case AArch64::FMULv2f32:
8095 Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
8096 Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
8097 break;
8098 case AArch64::FMULv2f64:
8099 Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
8100 Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
8101 break;
8102 case AArch64::FMULv4f16:
8103 Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
8104 Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
8105 break;
8106 case AArch64::FMULv4f32:
8107 Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
8108 Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
8109 break;
8110 case AArch64::FMULv8f16:
8111 Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
8112 Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
8113 break;
8114 }
8115
8116 return Found;
8117}
8118
8120 SmallVectorImpl<unsigned> &Patterns) {
8121 unsigned Opc = Root.getOpcode();
8122 MachineBasicBlock &MBB = *Root.getParent();
8123 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
8124
8125 auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool {
8126 MachineOperand &MO = Root.getOperand(1);
8128 if (MI != nullptr && (MI->getOpcode() == Opcode) &&
8129 MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) &&
8133 MI->getFlag(MachineInstr::MIFlag::FmNsz)) {
8134 Patterns.push_back(Pattern);
8135 return true;
8136 }
8137 return false;
8138 };
8139
8140 switch (Opc) {
8141 default:
8142 break;
8143 case AArch64::FNEGDr:
8144 return Match(AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD);
8145 case AArch64::FNEGSr:
8146 return Match(AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD);
8147 }
8148
8149 return false;
8150}
8151
8152/// Return true when a code sequence can improve throughput. It
8153/// should be called only for instructions in loops.
8154/// \param Pattern - combiner pattern
8156 switch (Pattern) {
8157 default:
8158 break;
8264 return true;
8265 } // end switch (Pattern)
8266 return false;
8267}
8268
8269/// Find other MI combine patterns.
8271 SmallVectorImpl<unsigned> &Patterns) {
8272 // A - (B + C) ==> (A - B) - C or (A - C) - B
8273 unsigned Opc = Root.getOpcode();
8274 MachineBasicBlock &MBB = *Root.getParent();
8275
8276 switch (Opc) {
8277 case AArch64::SUBWrr:
8278 case AArch64::SUBSWrr:
8279 case AArch64::SUBXrr:
8280 case AArch64::SUBSXrr:
8281 // Found candidate root.
8282 break;
8283 default:
8284 return false;
8285 }
8286
8288 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) ==
8289 -1)
8290 return false;
8291
8292 if (canCombine(MBB, Root.getOperand(2), AArch64::ADDWrr) ||
8293 canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) ||
8294 canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) ||
8295 canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) {
8298 return true;
8299 }
8300
8301 return false;
8302}
8303
8304/// Check if the given instruction forms a gather load pattern that can be
8305/// optimized for better Memory-Level Parallelism (MLP). This function
8306/// identifies chains of NEON lane load instructions that load data from
8307/// different memory addresses into individual lanes of a 128-bit vector
8308/// register, then attempts to split the pattern into parallel loads to break
8309/// the serial dependency between instructions.
8310///
8311/// Pattern Matched:
8312/// Initial scalar load -> SUBREG_TO_REG (lane 0) -> LD1i* (lane 1) ->
8313/// LD1i* (lane 2) -> ... -> LD1i* (lane N-1, Root)
8314///
8315/// Transformed Into:
8316/// Two parallel vector loads using fewer lanes each, followed by ZIP1v2i64
8317/// to combine the results, enabling better memory-level parallelism.
8318///
8319/// Supported Element Types:
8320/// - 32-bit elements (LD1i32, 4 lanes total)
8321/// - 16-bit elements (LD1i16, 8 lanes total)
8322/// - 8-bit elements (LD1i8, 16 lanes total)
8324 SmallVectorImpl<unsigned> &Patterns,
8325 unsigned LoadLaneOpCode, unsigned NumLanes) {
8326 const MachineFunction *MF = Root.getMF();
8327
8328 // Early exit if optimizing for size.
8329 if (MF->getFunction().hasMinSize())
8330 return false;
8331
8332 const MachineRegisterInfo &MRI = MF->getRegInfo();
8334
8335 // The root of the pattern must load into the last lane of the vector.
8336 if (Root.getOperand(2).getImm() != NumLanes - 1)
8337 return false;
8338
8339 // Check that we have load into all lanes except lane 0.
8340 // For each load we also want to check that:
8341 // 1. It has a single non-debug use (since we will be replacing the virtual
8342 // register)
8343 // 2. That the addressing mode only uses a single pointer operand
8344 auto *CurrInstr = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
8345 auto Range = llvm::seq<unsigned>(1, NumLanes - 1);
8346 SmallSet<unsigned, 16> RemainingLanes(Range.begin(), Range.end());
8348 while (!RemainingLanes.empty() && CurrInstr &&
8349 CurrInstr->getOpcode() == LoadLaneOpCode &&
8350 MRI.hasOneNonDBGUse(CurrInstr->getOperand(0).getReg()) &&
8351 CurrInstr->getNumOperands() == 4) {
8352 RemainingLanes.erase(CurrInstr->getOperand(2).getImm());
8353 LoadInstrs.push_back(CurrInstr);
8354 CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
8355 }
8356
8357 // Check that we have found a match for lanes N-1.. 1.
8358 if (!RemainingLanes.empty())
8359 return false;
8360
8361 // Match the SUBREG_TO_REG sequence.
8362 if (CurrInstr->getOpcode() != TargetOpcode::SUBREG_TO_REG)
8363 return false;
8364
8365 // Verify that the subreg to reg loads an integer into the first lane.
8366 auto Lane0LoadReg = CurrInstr->getOperand(1).getReg();
8367 unsigned SingleLaneSizeInBits = 128 / NumLanes;
8368 if (TRI->getRegSizeInBits(Lane0LoadReg, MRI) != SingleLaneSizeInBits)
8369 return false;
8370
8371 // Verify that it also has a single non debug use.
8372 if (!MRI.hasOneNonDBGUse(Lane0LoadReg))
8373 return false;
8374
8375 LoadInstrs.push_back(MRI.getUniqueVRegDef(Lane0LoadReg));
8376
8377 // If there is any chance of aliasing, do not apply the pattern.
8378 // Walk backward through the MBB starting from Root.
8379 // Exit early if we've encountered all load instructions or hit the search
8380 // limit.
8381 auto MBBItr = Root.getIterator();
8382 unsigned RemainingSteps = GatherOptSearchLimit;
8383 SmallPtrSet<const MachineInstr *, 16> RemainingLoadInstrs;
8384 RemainingLoadInstrs.insert(LoadInstrs.begin(), LoadInstrs.end());
8385 const MachineBasicBlock *MBB = Root.getParent();
8386
8387 for (; MBBItr != MBB->begin() && RemainingSteps > 0 &&
8388 !RemainingLoadInstrs.empty();
8389 --MBBItr, --RemainingSteps) {
8390 const MachineInstr &CurrInstr = *MBBItr;
8391
8392 // Remove this instruction from remaining loads if it's one we're tracking.
8393 RemainingLoadInstrs.erase(&CurrInstr);
8394
8395 // Check for potential aliasing with any of the load instructions to
8396 // optimize.
8397 if (CurrInstr.isLoadFoldBarrier())
8398 return false;
8399 }
8400
8401 // If we hit the search limit without finding all load instructions,
8402 // don't match the pattern.
8403 if (RemainingSteps == 0 && !RemainingLoadInstrs.empty())
8404 return false;
8405
8406 switch (NumLanes) {
8407 case 4:
8409 break;
8410 case 8:
8412 break;
8413 case 16:
8415 break;
8416 default:
8417 llvm_unreachable("Got bad number of lanes for gather pattern.");
8418 }
8419
8420 return true;
8421}
8422
8423/// Search for patterns of LD instructions we can optimize.
8425 SmallVectorImpl<unsigned> &Patterns) {
8426
8427 // The pattern searches for loads into single lanes.
8428 switch (Root.getOpcode()) {
8429 case AArch64::LD1i32:
8430 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 4);
8431 case AArch64::LD1i16:
8432 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 8);
8433 case AArch64::LD1i8:
8434 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 16);
8435 default:
8436 return false;
8437 }
8438}
8439
8440/// Generate optimized instruction sequence for gather load patterns to improve
8441/// Memory-Level Parallelism (MLP). This function transforms a chain of
8442/// sequential NEON lane loads into parallel vector loads that can execute
8443/// concurrently.
8444static void
8448 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
8449 unsigned Pattern, unsigned NumLanes) {
8450 MachineFunction &MF = *Root.getParent()->getParent();
8451 MachineRegisterInfo &MRI = MF.getRegInfo();
8453
8454 // Gather the initial load instructions to build the pattern.
8455 SmallVector<MachineInstr *, 16> LoadToLaneInstrs;
8456 MachineInstr *CurrInstr = &Root;
8457 for (unsigned i = 0; i < NumLanes - 1; ++i) {
8458 LoadToLaneInstrs.push_back(CurrInstr);
8459 CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
8460 }
8461
8462 // Sort the load instructions according to the lane.
8463 llvm::sort(LoadToLaneInstrs,
8464 [](const MachineInstr *A, const MachineInstr *B) {
8465 return A->getOperand(2).getImm() > B->getOperand(2).getImm();
8466 });
8467
8468 MachineInstr *SubregToReg = CurrInstr;
8469 LoadToLaneInstrs.push_back(
8470 MRI.getUniqueVRegDef(SubregToReg->getOperand(1).getReg()));
8471 auto LoadToLaneInstrsAscending = llvm::reverse(LoadToLaneInstrs);
8472
8473 const TargetRegisterClass *FPR128RegClass =
8474 MRI.getRegClass(Root.getOperand(0).getReg());
8475
8476 // Helper lambda to create a LD1 instruction.
8477 auto CreateLD1Instruction = [&](MachineInstr *OriginalInstr,
8478 Register SrcRegister, unsigned Lane,
8479 Register OffsetRegister,
8480 bool OffsetRegisterKillState) {
8481 auto NewRegister = MRI.createVirtualRegister(FPR128RegClass);
8482 MachineInstrBuilder LoadIndexIntoRegister =
8483 BuildMI(MF, MIMetadata(*OriginalInstr), TII->get(Root.getOpcode()),
8484 NewRegister)
8485 .addReg(SrcRegister)
8486 .addImm(Lane)
8487 .addReg(OffsetRegister, getKillRegState(OffsetRegisterKillState))
8488 .setMemRefs(OriginalInstr->memoperands());
8489 InstrIdxForVirtReg.insert(std::make_pair(NewRegister, InsInstrs.size()));
8490 InsInstrs.push_back(LoadIndexIntoRegister);
8491 return NewRegister;
8492 };
8493
8494 // Helper to create load instruction based on the NumLanes in the NEON
8495 // register we are rewriting.
8496 auto CreateLDRInstruction =
8497 [&](unsigned NumLanes, Register DestReg, Register OffsetReg,
8499 unsigned Opcode;
8500 switch (NumLanes) {
8501 case 4:
8502 Opcode = AArch64::LDRSui;
8503 break;
8504 case 8:
8505 Opcode = AArch64::LDRHui;
8506 break;
8507 case 16:
8508 Opcode = AArch64::LDRBui;
8509 break;
8510 default:
8512 "Got unsupported number of lanes in machine-combiner gather pattern");
8513 }
8514 // Immediate offset load
8515 return BuildMI(MF, MIMetadata(Root), TII->get(Opcode), DestReg)
8516 .addReg(OffsetReg)
8517 .addImm(0)
8518 .setMemRefs(MMOs);
8519 };
8520
8521 // Load the remaining lanes into register 0.
8522 auto LanesToLoadToReg0 =
8523 llvm::make_range(LoadToLaneInstrsAscending.begin() + 1,
8524 LoadToLaneInstrsAscending.begin() + NumLanes / 2);
8525 Register PrevReg = SubregToReg->getOperand(0).getReg();
8526 for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg0)) {
8527 const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
8528 PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,
8529 OffsetRegOperand.getReg(),
8530 OffsetRegOperand.isKill());
8531 DelInstrs.push_back(LoadInstr);
8532 }
8533 Register LastLoadReg0 = PrevReg;
8534
8535 // First load into register 1. Perform an integer load to zero out the upper
8536 // lanes in a single instruction.
8537 MachineInstr *Lane0Load = *LoadToLaneInstrsAscending.begin();
8538 MachineInstr *OriginalSplitLoad =
8539 *std::next(LoadToLaneInstrsAscending.begin(), NumLanes / 2);
8540 Register DestRegForMiddleIndex = MRI.createVirtualRegister(
8541 MRI.getRegClass(Lane0Load->getOperand(0).getReg()));
8542
8543 const MachineOperand &OriginalSplitToLoadOffsetOperand =
8544 OriginalSplitLoad->getOperand(3);
8545 MachineInstrBuilder MiddleIndexLoadInstr =
8546 CreateLDRInstruction(NumLanes, DestRegForMiddleIndex,
8547 OriginalSplitToLoadOffsetOperand.getReg(),
8548 OriginalSplitLoad->memoperands());
8549
8550 InstrIdxForVirtReg.insert(
8551 std::make_pair(DestRegForMiddleIndex, InsInstrs.size()));
8552 InsInstrs.push_back(MiddleIndexLoadInstr);
8553 DelInstrs.push_back(OriginalSplitLoad);
8554
8555 // Subreg To Reg instruction for register 1.
8556 Register DestRegForSubregToReg = MRI.createVirtualRegister(FPR128RegClass);
8557 unsigned SubregType;
8558 switch (NumLanes) {
8559 case 4:
8560 SubregType = AArch64::ssub;
8561 break;
8562 case 8:
8563 SubregType = AArch64::hsub;
8564 break;
8565 case 16:
8566 SubregType = AArch64::bsub;
8567 break;
8568 default:
8570 "Got invalid NumLanes for machine-combiner gather pattern");
8571 }
8572
8573 auto SubRegToRegInstr =
8574 BuildMI(MF, MIMetadata(Root), TII->get(SubregToReg->getOpcode()),
8575 DestRegForSubregToReg)
8576 .addReg(DestRegForMiddleIndex, getKillRegState(true))
8577 .addImm(SubregType);
8578 InstrIdxForVirtReg.insert(
8579 std::make_pair(DestRegForSubregToReg, InsInstrs.size()));
8580 InsInstrs.push_back(SubRegToRegInstr);
8581
8582 // Load remaining lanes into register 1.
8583 auto LanesToLoadToReg1 =
8584 llvm::make_range(LoadToLaneInstrsAscending.begin() + NumLanes / 2 + 1,
8585 LoadToLaneInstrsAscending.end());
8586 PrevReg = SubRegToRegInstr->getOperand(0).getReg();
8587 for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg1)) {
8588 const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
8589 PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,
8590 OffsetRegOperand.getReg(),
8591 OffsetRegOperand.isKill());
8592
8593 // Do not add the last reg to DelInstrs - it will be removed later.
8594 if (Index == NumLanes / 2 - 2) {
8595 break;
8596 }
8597 DelInstrs.push_back(LoadInstr);
8598 }
8599 Register LastLoadReg1 = PrevReg;
8600
8601 // Create the final zip instruction to combine the results.
8602 MachineInstrBuilder ZipInstr =
8603 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::ZIP1v2i64),
8604 Root.getOperand(0).getReg())
8605 .addReg(LastLoadReg0)
8606 .addReg(LastLoadReg1);
8607 InsInstrs.push_back(ZipInstr);
8608}
8609
8623
8624/// Return true when there is potentially a faster code sequence for an
8625/// instruction chain ending in \p Root. All potential patterns are listed in
8626/// the \p Pattern vector. Pattern should be sorted in priority order since the
8627/// pattern evaluator stops checking as soon as it finds a faster sequence.
8628
8629bool AArch64InstrInfo::getMachineCombinerPatterns(
8630 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
8631 bool DoRegPressureReduce) const {
8632 // Integer patterns
8633 if (getMaddPatterns(Root, Patterns))
8634 return true;
8635 // Floating point patterns
8636 if (getFMULPatterns(Root, Patterns))
8637 return true;
8638 if (getFMAPatterns(Root, Patterns))
8639 return true;
8640 if (getFNEGPatterns(Root, Patterns))
8641 return true;
8642
8643 // Other patterns
8644 if (getMiscPatterns(Root, Patterns))
8645 return true;
8646
8647 // Load patterns
8648 if (getLoadPatterns(Root, Patterns))
8649 return true;
8650
8651 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
8652 DoRegPressureReduce);
8653}
8654
8656/// genFusedMultiply - Generate fused multiply instructions.
8657/// This function supports both integer and floating point instructions.
8658/// A typical example:
8659/// F|MUL I=A,B,0
8660/// F|ADD R,I,C
8661/// ==> F|MADD R,A,B,C
8662/// \param MF Containing MachineFunction
8663/// \param MRI Register information
8664/// \param TII Target information
8665/// \param Root is the F|ADD instruction
8666/// \param [out] InsInstrs is a vector of machine instructions and will
8667/// contain the generated madd instruction
8668/// \param IdxMulOpd is index of operand in Root that is the result of
8669/// the F|MUL. In the example above IdxMulOpd is 1.
8670/// \param MaddOpc the opcode fo the f|madd instruction
8671/// \param RC Register class of operands
8672/// \param kind of fma instruction (addressing mode) to be generated
8673/// \param ReplacedAddend is the result register from the instruction
8674/// replacing the non-combined operand, if any.
8675static MachineInstr *
8677 const TargetInstrInfo *TII, MachineInstr &Root,
8678 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
8679 unsigned MaddOpc, const TargetRegisterClass *RC,
8681 const Register *ReplacedAddend = nullptr) {
8682 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
8683
8684 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
8685 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
8686 Register ResultReg = Root.getOperand(0).getReg();
8687 Register SrcReg0 = MUL->getOperand(1).getReg();
8688 bool Src0IsKill = MUL->getOperand(1).isKill();
8689 Register SrcReg1 = MUL->getOperand(2).getReg();
8690 bool Src1IsKill = MUL->getOperand(2).isKill();
8691
8692 Register SrcReg2;
8693 bool Src2IsKill;
8694 if (ReplacedAddend) {
8695 // If we just generated a new addend, we must be it's only use.
8696 SrcReg2 = *ReplacedAddend;
8697 Src2IsKill = true;
8698 } else {
8699 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
8700 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
8701 }
8702
8703 if (ResultReg.isVirtual())
8704 MRI.constrainRegClass(ResultReg, RC);
8705 if (SrcReg0.isVirtual())
8706 MRI.constrainRegClass(SrcReg0, RC);
8707 if (SrcReg1.isVirtual())
8708 MRI.constrainRegClass(SrcReg1, RC);
8709 if (SrcReg2.isVirtual())
8710 MRI.constrainRegClass(SrcReg2, RC);
8711
8713 if (kind == FMAInstKind::Default)
8714 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8715 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8716 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8717 .addReg(SrcReg2, getKillRegState(Src2IsKill));
8718 else if (kind == FMAInstKind::Indexed)
8719 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8720 .addReg(SrcReg2, getKillRegState(Src2IsKill))
8721 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8722 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8723 .addImm(MUL->getOperand(3).getImm());
8724 else if (kind == FMAInstKind::Accumulator)
8725 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8726 .addReg(SrcReg2, getKillRegState(Src2IsKill))
8727 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8728 .addReg(SrcReg1, getKillRegState(Src1IsKill));
8729 else
8730 assert(false && "Invalid FMA instruction kind \n");
8731 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
8732 InsInstrs.push_back(MIB);
8733 return MUL;
8734}
8735
8736static MachineInstr *
8738 const TargetInstrInfo *TII, MachineInstr &Root,
8740 MachineInstr *MAD = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
8741
8742 unsigned Opc = 0;
8743 const TargetRegisterClass *RC = MRI.getRegClass(MAD->getOperand(0).getReg());
8744 if (AArch64::FPR32RegClass.hasSubClassEq(RC))
8745 Opc = AArch64::FNMADDSrrr;
8746 else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
8747 Opc = AArch64::FNMADDDrrr;
8748 else
8749 return nullptr;
8750
8751 Register ResultReg = Root.getOperand(0).getReg();
8752 Register SrcReg0 = MAD->getOperand(1).getReg();
8753 Register SrcReg1 = MAD->getOperand(2).getReg();
8754 Register SrcReg2 = MAD->getOperand(3).getReg();
8755 bool Src0IsKill = MAD->getOperand(1).isKill();
8756 bool Src1IsKill = MAD->getOperand(2).isKill();
8757 bool Src2IsKill = MAD->getOperand(3).isKill();
8758 if (ResultReg.isVirtual())
8759 MRI.constrainRegClass(ResultReg, RC);
8760 if (SrcReg0.isVirtual())
8761 MRI.constrainRegClass(SrcReg0, RC);
8762 if (SrcReg1.isVirtual())
8763 MRI.constrainRegClass(SrcReg1, RC);
8764 if (SrcReg2.isVirtual())
8765 MRI.constrainRegClass(SrcReg2, RC);
8766
8768 BuildMI(MF, MIMetadata(Root), TII->get(Opc), ResultReg)
8769 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8770 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8771 .addReg(SrcReg2, getKillRegState(Src2IsKill));
8772 InsInstrs.push_back(MIB);
8773
8774 return MAD;
8775}
8776
8777/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
8778static MachineInstr *
8781 unsigned IdxDupOp, unsigned MulOpc,
8782 const TargetRegisterClass *RC, MachineRegisterInfo &MRI) {
8783 assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
8784 "Invalid index of FMUL operand");
8785
8786 MachineFunction &MF = *Root.getMF();
8788
8789 MachineInstr *Dup =
8790 MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
8791
8792 if (Dup->getOpcode() == TargetOpcode::COPY)
8793 Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg());
8794
8795 Register DupSrcReg = Dup->getOperand(1).getReg();
8796 MRI.clearKillFlags(DupSrcReg);
8797 MRI.constrainRegClass(DupSrcReg, RC);
8798
8799 unsigned DupSrcLane = Dup->getOperand(2).getImm();
8800
8801 unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
8802 MachineOperand &MulOp = Root.getOperand(IdxMulOp);
8803
8804 Register ResultReg = Root.getOperand(0).getReg();
8805
8807 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MulOpc), ResultReg)
8808 .add(MulOp)
8809 .addReg(DupSrcReg)
8810 .addImm(DupSrcLane);
8811
8812 InsInstrs.push_back(MIB);
8813 return &Root;
8814}
8815
8816/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
8817/// instructions.
8818///
8819/// \see genFusedMultiply
8823 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
8824 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8826}
8827
8828/// genNeg - Helper to generate an intermediate negation of the second operand
8829/// of Root
8831 const TargetInstrInfo *TII, MachineInstr &Root,
8833 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
8834 unsigned MnegOpc, const TargetRegisterClass *RC) {
8835 Register NewVR = MRI.createVirtualRegister(RC);
8837 BuildMI(MF, MIMetadata(Root), TII->get(MnegOpc), NewVR)
8838 .add(Root.getOperand(2));
8839 InsInstrs.push_back(MIB);
8840
8841 assert(InstrIdxForVirtReg.empty());
8842 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8843
8844 return NewVR;
8845}
8846
8847/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
8848/// instructions with an additional negation of the accumulator
8852 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
8853 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
8854 assert(IdxMulOpd == 1);
8855
8856 Register NewVR =
8857 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
8858 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8859 FMAInstKind::Accumulator, &NewVR);
8860}
8861
8862/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
8863/// instructions.
8864///
8865/// \see genFusedMultiply
8869 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
8870 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8872}
8873
8874/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
8875/// instructions with an additional negation of the accumulator
8879 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
8880 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
8881 assert(IdxMulOpd == 1);
8882
8883 Register NewVR =
8884 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
8885
8886 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8887 FMAInstKind::Indexed, &NewVR);
8888}
8889
8890/// genMaddR - Generate madd instruction and combine mul and add using
8891/// an extra virtual register
8892/// Example - an ADD intermediate needs to be stored in a register:
8893/// MUL I=A,B,0
8894/// ADD R,I,Imm
8895/// ==> ORR V, ZR, Imm
8896/// ==> MADD R,A,B,V
8897/// \param MF Containing MachineFunction
8898/// \param MRI Register information
8899/// \param TII Target information
8900/// \param Root is the ADD instruction
8901/// \param [out] InsInstrs is a vector of machine instructions and will
8902/// contain the generated madd instruction
8903/// \param IdxMulOpd is index of operand in Root that is the result of
8904/// the MUL. In the example above IdxMulOpd is 1.
8905/// \param MaddOpc the opcode fo the madd instruction
8906/// \param VR is a virtual register that holds the value of an ADD operand
8907/// (V in the example above).
8908/// \param RC Register class of operands
8910 const TargetInstrInfo *TII, MachineInstr &Root,
8912 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
8913 const TargetRegisterClass *RC) {
8914 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
8915
8916 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
8917 Register ResultReg = Root.getOperand(0).getReg();
8918 Register SrcReg0 = MUL->getOperand(1).getReg();
8919 bool Src0IsKill = MUL->getOperand(1).isKill();
8920 Register SrcReg1 = MUL->getOperand(2).getReg();
8921 bool Src1IsKill = MUL->getOperand(2).isKill();
8922
8923 if (ResultReg.isVirtual())
8924 MRI.constrainRegClass(ResultReg, RC);
8925 if (SrcReg0.isVirtual())
8926 MRI.constrainRegClass(SrcReg0, RC);
8927 if (SrcReg1.isVirtual())
8928 MRI.constrainRegClass(SrcReg1, RC);
8930 MRI.constrainRegClass(VR, RC);
8931
8933 BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8934 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8935 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8936 .addReg(VR);
8937 // Insert the MADD
8938 InsInstrs.push_back(MIB);
8939 return MUL;
8940}
8941
8942/// Do the following transformation
8943/// A - (B + C) ==> (A - B) - C
8944/// A - (B + C) ==> (A - C) - B
8946 const TargetInstrInfo *TII, MachineInstr &Root,
8949 unsigned IdxOpd1,
8950 DenseMap<Register, unsigned> &InstrIdxForVirtReg) {
8951 assert(IdxOpd1 == 1 || IdxOpd1 == 2);
8952 unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1;
8953 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
8954
8955 Register ResultReg = Root.getOperand(0).getReg();
8956 Register RegA = Root.getOperand(1).getReg();
8957 bool RegAIsKill = Root.getOperand(1).isKill();
8958 Register RegB = AddMI->getOperand(IdxOpd1).getReg();
8959 bool RegBIsKill = AddMI->getOperand(IdxOpd1).isKill();
8960 Register RegC = AddMI->getOperand(IdxOtherOpd).getReg();
8961 bool RegCIsKill = AddMI->getOperand(IdxOtherOpd).isKill();
8962 Register NewVR =
8964
8965 unsigned Opcode = Root.getOpcode();
8966 if (Opcode == AArch64::SUBSWrr)
8967 Opcode = AArch64::SUBWrr;
8968 else if (Opcode == AArch64::SUBSXrr)
8969 Opcode = AArch64::SUBXrr;
8970 else
8971 assert((Opcode == AArch64::SUBWrr || Opcode == AArch64::SUBXrr) &&
8972 "Unexpected instruction opcode.");
8973
8974 uint32_t Flags = Root.mergeFlagsWith(*AddMI);
8975 Flags &= ~MachineInstr::NoSWrap;
8976 Flags &= ~MachineInstr::NoUWrap;
8977
8978 MachineInstrBuilder MIB1 =
8979 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), NewVR)
8980 .addReg(RegA, getKillRegState(RegAIsKill))
8981 .addReg(RegB, getKillRegState(RegBIsKill))
8982 .setMIFlags(Flags);
8983 MachineInstrBuilder MIB2 =
8984 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), ResultReg)
8985 .addReg(NewVR, getKillRegState(true))
8986 .addReg(RegC, getKillRegState(RegCIsKill))
8987 .setMIFlags(Flags);
8988
8989 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8990 InsInstrs.push_back(MIB1);
8991 InsInstrs.push_back(MIB2);
8992 DelInstrs.push_back(AddMI);
8993 DelInstrs.push_back(&Root);
8994}
8995
8996unsigned AArch64InstrInfo::getReduceOpcodeForAccumulator(
8997 unsigned int AccumulatorOpCode) const {
8998 switch (AccumulatorOpCode) {
8999 case AArch64::UABALB_ZZZ_D:
9000 case AArch64::SABALB_ZZZ_D:
9001 case AArch64::UABALT_ZZZ_D:
9002 case AArch64::SABALT_ZZZ_D:
9003 return AArch64::ADD_ZZZ_D;
9004 case AArch64::UABALB_ZZZ_H:
9005 case AArch64::SABALB_ZZZ_H:
9006 case AArch64::UABALT_ZZZ_H:
9007 case AArch64::SABALT_ZZZ_H:
9008 return AArch64::ADD_ZZZ_H;
9009 case AArch64::UABALB_ZZZ_S:
9010 case AArch64::SABALB_ZZZ_S:
9011 case AArch64::UABALT_ZZZ_S:
9012 case AArch64::SABALT_ZZZ_S:
9013 return AArch64::ADD_ZZZ_S;
9014 case AArch64::UABALv16i8_v8i16:
9015 case AArch64::SABALv8i8_v8i16:
9016 case AArch64::SABAv8i16:
9017 case AArch64::UABAv8i16:
9018 return AArch64::ADDv8i16;
9019 case AArch64::SABALv2i32_v2i64:
9020 case AArch64::UABALv2i32_v2i64:
9021 case AArch64::SABALv4i32_v2i64:
9022 return AArch64::ADDv2i64;
9023 case AArch64::UABALv4i16_v4i32:
9024 case AArch64::SABALv4i16_v4i32:
9025 case AArch64::SABALv8i16_v4i32:
9026 case AArch64::SABAv4i32:
9027 case AArch64::UABAv4i32:
9028 return AArch64::ADDv4i32;
9029 case AArch64::UABALv4i32_v2i64:
9030 return AArch64::ADDv2i64;
9031 case AArch64::UABALv8i16_v4i32:
9032 return AArch64::ADDv4i32;
9033 case AArch64::UABALv8i8_v8i16:
9034 case AArch64::SABALv16i8_v8i16:
9035 return AArch64::ADDv8i16;
9036 case AArch64::UABAv16i8:
9037 case AArch64::SABAv16i8:
9038 return AArch64::ADDv16i8;
9039 case AArch64::UABAv4i16:
9040 case AArch64::SABAv4i16:
9041 return AArch64::ADDv4i16;
9042 case AArch64::UABAv2i32:
9043 case AArch64::SABAv2i32:
9044 return AArch64::ADDv2i32;
9045 case AArch64::UABAv8i8:
9046 case AArch64::SABAv8i8:
9047 return AArch64::ADDv8i8;
9048 default:
9049 llvm_unreachable("Unknown accumulator opcode");
9050 }
9051}
9052
9053/// When getMachineCombinerPatterns() finds potential patterns,
9054/// this function generates the instructions that could replace the
9055/// original code sequence
9056void AArch64InstrInfo::genAlternativeCodeSequence(
9057 MachineInstr &Root, unsigned Pattern,
9060 DenseMap<Register, unsigned> &InstrIdxForVirtReg) const {
9061 MachineBasicBlock &MBB = *Root.getParent();
9062 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
9063 MachineFunction &MF = *MBB.getParent();
9064 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
9065
9066 MachineInstr *MUL = nullptr;
9067 const TargetRegisterClass *RC;
9068 unsigned Opc;
9069 switch (Pattern) {
9070 default:
9071 // Reassociate instructions.
9072 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
9073 DelInstrs, InstrIdxForVirtReg);
9074 return;
9076 // A - (B + C)
9077 // ==> (A - B) - C
9078 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1,
9079 InstrIdxForVirtReg);
9080 return;
9082 // A - (B + C)
9083 // ==> (A - C) - B
9084 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2,
9085 InstrIdxForVirtReg);
9086 return;
9089 // MUL I=A,B,0
9090 // ADD R,I,C
9091 // ==> MADD R,A,B,C
9092 // --- Create(MADD);
9094 Opc = AArch64::MADDWrrr;
9095 RC = &AArch64::GPR32RegClass;
9096 } else {
9097 Opc = AArch64::MADDXrrr;
9098 RC = &AArch64::GPR64RegClass;
9099 }
9100 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9101 break;
9104 // MUL I=A,B,0
9105 // ADD R,C,I
9106 // ==> MADD R,A,B,C
9107 // --- Create(MADD);
9109 Opc = AArch64::MADDWrrr;
9110 RC = &AArch64::GPR32RegClass;
9111 } else {
9112 Opc = AArch64::MADDXrrr;
9113 RC = &AArch64::GPR64RegClass;
9114 }
9115 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9116 break;
9121 // MUL I=A,B,0
9122 // ADD/SUB R,I,Imm
9123 // ==> MOV V, Imm/-Imm
9124 // ==> MADD R,A,B,V
9125 // --- Create(MADD);
9126 const TargetRegisterClass *RC;
9127 unsigned BitSize, MovImm;
9130 MovImm = AArch64::MOVi32imm;
9131 RC = &AArch64::GPR32spRegClass;
9132 BitSize = 32;
9133 Opc = AArch64::MADDWrrr;
9134 RC = &AArch64::GPR32RegClass;
9135 } else {
9136 MovImm = AArch64::MOVi64imm;
9137 RC = &AArch64::GPR64spRegClass;
9138 BitSize = 64;
9139 Opc = AArch64::MADDXrrr;
9140 RC = &AArch64::GPR64RegClass;
9141 }
9142 Register NewVR = MRI.createVirtualRegister(RC);
9143 uint64_t Imm = Root.getOperand(2).getImm();
9144
9145 if (Root.getOperand(3).isImm()) {
9146 unsigned Val = Root.getOperand(3).getImm();
9147 Imm = Imm << Val;
9148 }
9149 bool IsSub = Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1 ||
9151 uint64_t UImm = SignExtend64(IsSub ? -Imm : Imm, BitSize);
9152 // Check that the immediate can be composed via a single instruction.
9154 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
9155 if (Insn.size() != 1)
9156 return;
9157 MachineInstrBuilder MIB1 =
9158 BuildMI(MF, MIMetadata(Root), TII->get(MovImm), NewVR)
9159 .addImm(IsSub ? -Imm : Imm);
9160 InsInstrs.push_back(MIB1);
9161 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9162 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
9163 break;
9164 }
9167 // MUL I=A,B,0
9168 // SUB R,I, C
9169 // ==> SUB V, 0, C
9170 // ==> MADD R,A,B,V // = -C + A*B
9171 // --- Create(MADD);
9172 const TargetRegisterClass *SubRC;
9173 unsigned SubOpc, ZeroReg;
9175 SubOpc = AArch64::SUBWrr;
9176 SubRC = &AArch64::GPR32spRegClass;
9177 ZeroReg = AArch64::WZR;
9178 Opc = AArch64::MADDWrrr;
9179 RC = &AArch64::GPR32RegClass;
9180 } else {
9181 SubOpc = AArch64::SUBXrr;
9182 SubRC = &AArch64::GPR64spRegClass;
9183 ZeroReg = AArch64::XZR;
9184 Opc = AArch64::MADDXrrr;
9185 RC = &AArch64::GPR64RegClass;
9186 }
9187 Register NewVR = MRI.createVirtualRegister(SubRC);
9188 // SUB NewVR, 0, C
9189 MachineInstrBuilder MIB1 =
9190 BuildMI(MF, MIMetadata(Root), TII->get(SubOpc), NewVR)
9191 .addReg(ZeroReg)
9192 .add(Root.getOperand(2));
9193 InsInstrs.push_back(MIB1);
9194 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9195 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
9196 break;
9197 }
9200 // MUL I=A,B,0
9201 // SUB R,C,I
9202 // ==> MSUB R,A,B,C (computes C - A*B)
9203 // --- Create(MSUB);
9205 Opc = AArch64::MSUBWrrr;
9206 RC = &AArch64::GPR32RegClass;
9207 } else {
9208 Opc = AArch64::MSUBXrrr;
9209 RC = &AArch64::GPR64RegClass;
9210 }
9211 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9212 break;
9214 Opc = AArch64::MLAv8i8;
9215 RC = &AArch64::FPR64RegClass;
9216 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9217 break;
9219 Opc = AArch64::MLAv8i8;
9220 RC = &AArch64::FPR64RegClass;
9221 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9222 break;
9224 Opc = AArch64::MLAv16i8;
9225 RC = &AArch64::FPR128RegClass;
9226 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9227 break;
9229 Opc = AArch64::MLAv16i8;
9230 RC = &AArch64::FPR128RegClass;
9231 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9232 break;
9234 Opc = AArch64::MLAv4i16;
9235 RC = &AArch64::FPR64RegClass;
9236 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9237 break;
9239 Opc = AArch64::MLAv4i16;
9240 RC = &AArch64::FPR64RegClass;
9241 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9242 break;
9244 Opc = AArch64::MLAv8i16;
9245 RC = &AArch64::FPR128RegClass;
9246 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9247 break;
9249 Opc = AArch64::MLAv8i16;
9250 RC = &AArch64::FPR128RegClass;
9251 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9252 break;
9254 Opc = AArch64::MLAv2i32;
9255 RC = &AArch64::FPR64RegClass;
9256 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9257 break;
9259 Opc = AArch64::MLAv2i32;
9260 RC = &AArch64::FPR64RegClass;
9261 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9262 break;
9264 Opc = AArch64::MLAv4i32;
9265 RC = &AArch64::FPR128RegClass;
9266 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9267 break;
9269 Opc = AArch64::MLAv4i32;
9270 RC = &AArch64::FPR128RegClass;
9271 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9272 break;
9273
9275 Opc = AArch64::MLAv8i8;
9276 RC = &AArch64::FPR64RegClass;
9277 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
9278 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
9279 RC);
9280 break;
9282 Opc = AArch64::MLSv8i8;
9283 RC = &AArch64::FPR64RegClass;
9284 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9285 break;
9287 Opc = AArch64::MLAv16i8;
9288 RC = &AArch64::FPR128RegClass;
9289 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
9290 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
9291 RC);
9292 break;
9294 Opc = AArch64::MLSv16i8;
9295 RC = &AArch64::FPR128RegClass;
9296 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9297 break;
9299 Opc = AArch64::MLAv4i16;
9300 RC = &AArch64::FPR64RegClass;
9301 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
9302 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
9303 RC);
9304 break;
9306 Opc = AArch64::MLSv4i16;
9307 RC = &AArch64::FPR64RegClass;
9308 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9309 break;
9311 Opc = AArch64::MLAv8i16;
9312 RC = &AArch64::FPR128RegClass;
9313 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
9314 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
9315 RC);
9316 break;
9318 Opc = AArch64::MLSv8i16;
9319 RC = &AArch64::FPR128RegClass;
9320 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9321 break;
9323 Opc = AArch64::MLAv2i32;
9324 RC = &AArch64::FPR64RegClass;
9325 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
9326 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
9327 RC);
9328 break;
9330 Opc = AArch64::MLSv2i32;
9331 RC = &AArch64::FPR64RegClass;
9332 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9333 break;
9335 Opc = AArch64::MLAv4i32;
9336 RC = &AArch64::FPR128RegClass;
9337 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
9338 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
9339 RC);
9340 break;
9342 Opc = AArch64::MLSv4i32;
9343 RC = &AArch64::FPR128RegClass;
9344 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9345 break;
9346
9348 Opc = AArch64::MLAv4i16_indexed;
9349 RC = &AArch64::FPR64RegClass;
9350 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9351 break;
9353 Opc = AArch64::MLAv4i16_indexed;
9354 RC = &AArch64::FPR64RegClass;
9355 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9356 break;
9358 Opc = AArch64::MLAv8i16_indexed;
9359 RC = &AArch64::FPR128RegClass;
9360 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9361 break;
9363 Opc = AArch64::MLAv8i16_indexed;
9364 RC = &AArch64::FPR128RegClass;
9365 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9366 break;
9368 Opc = AArch64::MLAv2i32_indexed;
9369 RC = &AArch64::FPR64RegClass;
9370 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9371 break;
9373 Opc = AArch64::MLAv2i32_indexed;
9374 RC = &AArch64::FPR64RegClass;
9375 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9376 break;
9378 Opc = AArch64::MLAv4i32_indexed;
9379 RC = &AArch64::FPR128RegClass;
9380 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9381 break;
9383 Opc = AArch64::MLAv4i32_indexed;
9384 RC = &AArch64::FPR128RegClass;
9385 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9386 break;
9387
9389 Opc = AArch64::MLAv4i16_indexed;
9390 RC = &AArch64::FPR64RegClass;
9391 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
9392 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
9393 RC);
9394 break;
9396 Opc = AArch64::MLSv4i16_indexed;
9397 RC = &AArch64::FPR64RegClass;
9398 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9399 break;
9401 Opc = AArch64::MLAv8i16_indexed;
9402 RC = &AArch64::FPR128RegClass;
9403 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
9404 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
9405 RC);
9406 break;
9408 Opc = AArch64::MLSv8i16_indexed;
9409 RC = &AArch64::FPR128RegClass;
9410 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9411 break;
9413 Opc = AArch64::MLAv2i32_indexed;
9414 RC = &AArch64::FPR64RegClass;
9415 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
9416 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
9417 RC);
9418 break;
9420 Opc = AArch64::MLSv2i32_indexed;
9421 RC = &AArch64::FPR64RegClass;
9422 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9423 break;
9425 Opc = AArch64::MLAv4i32_indexed;
9426 RC = &AArch64::FPR128RegClass;
9427 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
9428 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
9429 RC);
9430 break;
9432 Opc = AArch64::MLSv4i32_indexed;
9433 RC = &AArch64::FPR128RegClass;
9434 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9435 break;
9436
9437 // Floating Point Support
9439 Opc = AArch64::FMADDHrrr;
9440 RC = &AArch64::FPR16RegClass;
9441 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9442 break;
9444 Opc = AArch64::FMADDSrrr;
9445 RC = &AArch64::FPR32RegClass;
9446 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9447 break;
9449 Opc = AArch64::FMADDDrrr;
9450 RC = &AArch64::FPR64RegClass;
9451 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9452 break;
9453
9455 Opc = AArch64::FMADDHrrr;
9456 RC = &AArch64::FPR16RegClass;
9457 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9458 break;
9460 Opc = AArch64::FMADDSrrr;
9461 RC = &AArch64::FPR32RegClass;
9462 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9463 break;
9465 Opc = AArch64::FMADDDrrr;
9466 RC = &AArch64::FPR64RegClass;
9467 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9468 break;
9469
9471 Opc = AArch64::FMLAv1i32_indexed;
9472 RC = &AArch64::FPR32RegClass;
9473 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9475 break;
9477 Opc = AArch64::FMLAv1i32_indexed;
9478 RC = &AArch64::FPR32RegClass;
9479 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9481 break;
9482
9484 Opc = AArch64::FMLAv1i64_indexed;
9485 RC = &AArch64::FPR64RegClass;
9486 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9488 break;
9490 Opc = AArch64::FMLAv1i64_indexed;
9491 RC = &AArch64::FPR64RegClass;
9492 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9494 break;
9495
9497 RC = &AArch64::FPR64RegClass;
9498 Opc = AArch64::FMLAv4i16_indexed;
9499 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9501 break;
9503 RC = &AArch64::FPR64RegClass;
9504 Opc = AArch64::FMLAv4f16;
9505 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9507 break;
9509 RC = &AArch64::FPR64RegClass;
9510 Opc = AArch64::FMLAv4i16_indexed;
9511 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9513 break;
9515 RC = &AArch64::FPR64RegClass;
9516 Opc = AArch64::FMLAv4f16;
9517 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9519 break;
9520
9523 RC = &AArch64::FPR64RegClass;
9525 Opc = AArch64::FMLAv2i32_indexed;
9526 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9528 } else {
9529 Opc = AArch64::FMLAv2f32;
9530 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9532 }
9533 break;
9536 RC = &AArch64::FPR64RegClass;
9538 Opc = AArch64::FMLAv2i32_indexed;
9539 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9541 } else {
9542 Opc = AArch64::FMLAv2f32;
9543 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9545 }
9546 break;
9547
9549 RC = &AArch64::FPR128RegClass;
9550 Opc = AArch64::FMLAv8i16_indexed;
9551 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9553 break;
9555 RC = &AArch64::FPR128RegClass;
9556 Opc = AArch64::FMLAv8f16;
9557 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9559 break;
9561 RC = &AArch64::FPR128RegClass;
9562 Opc = AArch64::FMLAv8i16_indexed;
9563 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9565 break;
9567 RC = &AArch64::FPR128RegClass;
9568 Opc = AArch64::FMLAv8f16;
9569 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9571 break;
9572
9575 RC = &AArch64::FPR128RegClass;
9577 Opc = AArch64::FMLAv2i64_indexed;
9578 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9580 } else {
9581 Opc = AArch64::FMLAv2f64;
9582 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9584 }
9585 break;
9588 RC = &AArch64::FPR128RegClass;
9590 Opc = AArch64::FMLAv2i64_indexed;
9591 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9593 } else {
9594 Opc = AArch64::FMLAv2f64;
9595 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9597 }
9598 break;
9599
9602 RC = &AArch64::FPR128RegClass;
9604 Opc = AArch64::FMLAv4i32_indexed;
9605 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9607 } else {
9608 Opc = AArch64::FMLAv4f32;
9609 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9611 }
9612 break;
9613
9616 RC = &AArch64::FPR128RegClass;
9618 Opc = AArch64::FMLAv4i32_indexed;
9619 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9621 } else {
9622 Opc = AArch64::FMLAv4f32;
9623 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9625 }
9626 break;
9627
9629 Opc = AArch64::FNMSUBHrrr;
9630 RC = &AArch64::FPR16RegClass;
9631 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9632 break;
9634 Opc = AArch64::FNMSUBSrrr;
9635 RC = &AArch64::FPR32RegClass;
9636 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9637 break;
9639 Opc = AArch64::FNMSUBDrrr;
9640 RC = &AArch64::FPR64RegClass;
9641 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9642 break;
9643
9645 Opc = AArch64::FNMADDHrrr;
9646 RC = &AArch64::FPR16RegClass;
9647 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9648 break;
9650 Opc = AArch64::FNMADDSrrr;
9651 RC = &AArch64::FPR32RegClass;
9652 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9653 break;
9655 Opc = AArch64::FNMADDDrrr;
9656 RC = &AArch64::FPR64RegClass;
9657 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9658 break;
9659
9661 Opc = AArch64::FMSUBHrrr;
9662 RC = &AArch64::FPR16RegClass;
9663 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9664 break;
9666 Opc = AArch64::FMSUBSrrr;
9667 RC = &AArch64::FPR32RegClass;
9668 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9669 break;
9671 Opc = AArch64::FMSUBDrrr;
9672 RC = &AArch64::FPR64RegClass;
9673 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9674 break;
9675
9677 Opc = AArch64::FMLSv1i32_indexed;
9678 RC = &AArch64::FPR32RegClass;
9679 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9681 break;
9682
9684 Opc = AArch64::FMLSv1i64_indexed;
9685 RC = &AArch64::FPR64RegClass;
9686 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9688 break;
9689
9692 RC = &AArch64::FPR64RegClass;
9693 Register NewVR = MRI.createVirtualRegister(RC);
9694 MachineInstrBuilder MIB1 =
9695 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f16), NewVR)
9696 .add(Root.getOperand(2));
9697 InsInstrs.push_back(MIB1);
9698 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9700 Opc = AArch64::FMLAv4f16;
9701 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9702 FMAInstKind::Accumulator, &NewVR);
9703 } else {
9704 Opc = AArch64::FMLAv4i16_indexed;
9705 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9706 FMAInstKind::Indexed, &NewVR);
9707 }
9708 break;
9709 }
9711 RC = &AArch64::FPR64RegClass;
9712 Opc = AArch64::FMLSv4f16;
9713 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9715 break;
9717 RC = &AArch64::FPR64RegClass;
9718 Opc = AArch64::FMLSv4i16_indexed;
9719 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9721 break;
9722
9725 RC = &AArch64::FPR64RegClass;
9727 Opc = AArch64::FMLSv2i32_indexed;
9728 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9730 } else {
9731 Opc = AArch64::FMLSv2f32;
9732 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9734 }
9735 break;
9736
9739 RC = &AArch64::FPR128RegClass;
9740 Register NewVR = MRI.createVirtualRegister(RC);
9741 MachineInstrBuilder MIB1 =
9742 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv8f16), NewVR)
9743 .add(Root.getOperand(2));
9744 InsInstrs.push_back(MIB1);
9745 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9747 Opc = AArch64::FMLAv8f16;
9748 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9749 FMAInstKind::Accumulator, &NewVR);
9750 } else {
9751 Opc = AArch64::FMLAv8i16_indexed;
9752 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9753 FMAInstKind::Indexed, &NewVR);
9754 }
9755 break;
9756 }
9758 RC = &AArch64::FPR128RegClass;
9759 Opc = AArch64::FMLSv8f16;
9760 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9762 break;
9764 RC = &AArch64::FPR128RegClass;
9765 Opc = AArch64::FMLSv8i16_indexed;
9766 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9768 break;
9769
9772 RC = &AArch64::FPR128RegClass;
9774 Opc = AArch64::FMLSv2i64_indexed;
9775 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9777 } else {
9778 Opc = AArch64::FMLSv2f64;
9779 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9781 }
9782 break;
9783
9786 RC = &AArch64::FPR128RegClass;
9788 Opc = AArch64::FMLSv4i32_indexed;
9789 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9791 } else {
9792 Opc = AArch64::FMLSv4f32;
9793 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9795 }
9796 break;
9799 RC = &AArch64::FPR64RegClass;
9800 Register NewVR = MRI.createVirtualRegister(RC);
9801 MachineInstrBuilder MIB1 =
9802 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f32), NewVR)
9803 .add(Root.getOperand(2));
9804 InsInstrs.push_back(MIB1);
9805 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9807 Opc = AArch64::FMLAv2i32_indexed;
9808 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9809 FMAInstKind::Indexed, &NewVR);
9810 } else {
9811 Opc = AArch64::FMLAv2f32;
9812 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9813 FMAInstKind::Accumulator, &NewVR);
9814 }
9815 break;
9816 }
9819 RC = &AArch64::FPR128RegClass;
9820 Register NewVR = MRI.createVirtualRegister(RC);
9821 MachineInstrBuilder MIB1 =
9822 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f32), NewVR)
9823 .add(Root.getOperand(2));
9824 InsInstrs.push_back(MIB1);
9825 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9827 Opc = AArch64::FMLAv4i32_indexed;
9828 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9829 FMAInstKind::Indexed, &NewVR);
9830 } else {
9831 Opc = AArch64::FMLAv4f32;
9832 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9833 FMAInstKind::Accumulator, &NewVR);
9834 }
9835 break;
9836 }
9839 RC = &AArch64::FPR128RegClass;
9840 Register NewVR = MRI.createVirtualRegister(RC);
9841 MachineInstrBuilder MIB1 =
9842 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f64), NewVR)
9843 .add(Root.getOperand(2));
9844 InsInstrs.push_back(MIB1);
9845 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9847 Opc = AArch64::FMLAv2i64_indexed;
9848 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9849 FMAInstKind::Indexed, &NewVR);
9850 } else {
9851 Opc = AArch64::FMLAv2f64;
9852 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9853 FMAInstKind::Accumulator, &NewVR);
9854 }
9855 break;
9856 }
9859 unsigned IdxDupOp =
9861 : 2;
9862 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
9863 &AArch64::FPR128RegClass, MRI);
9864 break;
9865 }
9868 unsigned IdxDupOp =
9870 : 2;
9871 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
9872 &AArch64::FPR128RegClass, MRI);
9873 break;
9874 }
9877 unsigned IdxDupOp =
9879 : 2;
9880 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
9881 &AArch64::FPR128_loRegClass, MRI);
9882 break;
9883 }
9886 unsigned IdxDupOp =
9888 : 2;
9889 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
9890 &AArch64::FPR128RegClass, MRI);
9891 break;
9892 }
9895 unsigned IdxDupOp =
9897 : 2;
9898 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
9899 &AArch64::FPR128_loRegClass, MRI);
9900 break;
9901 }
9903 MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
9904 break;
9905 }
9907 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9908 Pattern, 4);
9909 break;
9910 }
9912 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9913 Pattern, 8);
9914 break;
9915 }
9917 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9918 Pattern, 16);
9919 break;
9920 }
9921
9922 } // end switch (Pattern)
9923 // Record MUL and ADD/SUB for deletion
9924 if (MUL)
9925 DelInstrs.push_back(MUL);
9926 DelInstrs.push_back(&Root);
9927
9928 // Set the flags on the inserted instructions to be the merged flags of the
9929 // instructions that we have combined.
9930 uint32_t Flags = Root.getFlags();
9931 if (MUL)
9932 Flags = Root.mergeFlagsWith(*MUL);
9933 for (auto *MI : InsInstrs)
9934 MI->setFlags(Flags);
9935}
9936
9937/// Replace csincr-branch sequence by simple conditional branch
9938///
9939/// Examples:
9940/// 1. \code
9941/// csinc w9, wzr, wzr, <condition code>
9942/// tbnz w9, #0, 0x44
9943/// \endcode
9944/// to
9945/// \code
9946/// b.<inverted condition code>
9947/// \endcode
9948///
9949/// 2. \code
9950/// csinc w9, wzr, wzr, <condition code>
9951/// tbz w9, #0, 0x44
9952/// \endcode
9953/// to
9954/// \code
9955/// b.<condition code>
9956/// \endcode
9957///
9958/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
9959/// compare's constant operand is power of 2.
9960///
9961/// Examples:
9962/// \code
9963/// and w8, w8, #0x400
9964/// cbnz w8, L1
9965/// \endcode
9966/// to
9967/// \code
9968/// tbnz w8, #10, L1
9969/// \endcode
9970///
9971/// \param MI Conditional Branch
9972/// \return True when the simple conditional branch is generated
9973///
9975 bool IsNegativeBranch = false;
9976 bool IsTestAndBranch = false;
9977 unsigned TargetBBInMI = 0;
9978 switch (MI.getOpcode()) {
9979 default:
9980 llvm_unreachable("Unknown branch instruction?");
9981 case AArch64::Bcc:
9982 case AArch64::CBWPri:
9983 case AArch64::CBXPri:
9984 case AArch64::CBBAssertExt:
9985 case AArch64::CBHAssertExt:
9986 case AArch64::CBWPrr:
9987 case AArch64::CBXPrr:
9988 return false;
9989 case AArch64::CBZW:
9990 case AArch64::CBZX:
9991 TargetBBInMI = 1;
9992 break;
9993 case AArch64::CBNZW:
9994 case AArch64::CBNZX:
9995 TargetBBInMI = 1;
9996 IsNegativeBranch = true;
9997 break;
9998 case AArch64::TBZW:
9999 case AArch64::TBZX:
10000 TargetBBInMI = 2;
10001 IsTestAndBranch = true;
10002 break;
10003 case AArch64::TBNZW:
10004 case AArch64::TBNZX:
10005 TargetBBInMI = 2;
10006 IsNegativeBranch = true;
10007 IsTestAndBranch = true;
10008 break;
10009 }
10010 // So we increment a zero register and test for bits other
10011 // than bit 0? Conservatively bail out in case the verifier
10012 // missed this case.
10013 if (IsTestAndBranch && MI.getOperand(1).getImm())
10014 return false;
10015
10016 // Find Definition.
10017 assert(MI.getParent() && "Incomplete machine instruction\n");
10018 MachineBasicBlock *MBB = MI.getParent();
10019 MachineFunction *MF = MBB->getParent();
10020 MachineRegisterInfo *MRI = &MF->getRegInfo();
10021 Register VReg = MI.getOperand(0).getReg();
10022 if (!VReg.isVirtual())
10023 return false;
10024
10025 MachineInstr *DefMI = MRI->getVRegDef(VReg);
10026
10027 // Look through COPY instructions to find definition.
10028 while (DefMI->isCopy()) {
10029 Register CopyVReg = DefMI->getOperand(1).getReg();
10030 if (!MRI->hasOneNonDBGUse(CopyVReg))
10031 return false;
10032 if (!MRI->hasOneDef(CopyVReg))
10033 return false;
10034 DefMI = MRI->getVRegDef(CopyVReg);
10035 }
10036
10037 switch (DefMI->getOpcode()) {
10038 default:
10039 return false;
10040 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
10041 case AArch64::ANDWri:
10042 case AArch64::ANDXri: {
10043 if (IsTestAndBranch)
10044 return false;
10045 if (DefMI->getParent() != MBB)
10046 return false;
10047 if (!MRI->hasOneNonDBGUse(VReg))
10048 return false;
10049
10050 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
10052 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
10053 if (!isPowerOf2_64(Mask))
10054 return false;
10055
10056 MachineOperand &MO = DefMI->getOperand(1);
10057 Register NewReg = MO.getReg();
10058 if (!NewReg.isVirtual())
10059 return false;
10060
10061 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
10062
10063 MachineBasicBlock &RefToMBB = *MBB;
10064 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
10065 DebugLoc DL = MI.getDebugLoc();
10066 unsigned Imm = Log2_64(Mask);
10067 unsigned Opc = (Imm < 32)
10068 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
10069 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
10070 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
10071 .addReg(NewReg)
10072 .addImm(Imm)
10073 .addMBB(TBB);
10074 // Register lives on to the CBZ now.
10075 MO.setIsKill(false);
10076
10077 // For immediate smaller than 32, we need to use the 32-bit
10078 // variant (W) in all cases. Indeed the 64-bit variant does not
10079 // allow to encode them.
10080 // Therefore, if the input register is 64-bit, we need to take the
10081 // 32-bit sub-part.
10082 if (!Is32Bit && Imm < 32)
10083 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
10084 MI.eraseFromParent();
10085 return true;
10086 }
10087 // Look for CSINC
10088 case AArch64::CSINCWr:
10089 case AArch64::CSINCXr: {
10090 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
10091 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
10092 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
10093 DefMI->getOperand(2).getReg() == AArch64::XZR))
10094 return false;
10095
10096 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
10097 true) != -1)
10098 return false;
10099
10100 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
10101 // Convert only when the condition code is not modified between
10102 // the CSINC and the branch. The CC may be used by other
10103 // instructions in between.
10105 return false;
10106 MachineBasicBlock &RefToMBB = *MBB;
10107 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
10108 DebugLoc DL = MI.getDebugLoc();
10109 if (IsNegativeBranch)
10111 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
10112 MI.eraseFromParent();
10113 return true;
10114 }
10115 }
10116}
10117
10118std::pair<unsigned, unsigned>
10119AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
10120 const unsigned Mask = AArch64II::MO_FRAGMENT;
10121 return std::make_pair(TF & Mask, TF & ~Mask);
10122}
10123
10125AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
10126 using namespace AArch64II;
10127
10128 static const std::pair<unsigned, const char *> TargetFlags[] = {
10129 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
10130 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
10131 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
10132 {MO_HI12, "aarch64-hi12"}};
10133 return ArrayRef(TargetFlags);
10134}
10135
10137AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
10138 using namespace AArch64II;
10139
10140 static const std::pair<unsigned, const char *> TargetFlags[] = {
10141 {MO_COFFSTUB, "aarch64-coffstub"},
10142 {MO_GOT, "aarch64-got"},
10143 {MO_NC, "aarch64-nc"},
10144 {MO_S, "aarch64-s"},
10145 {MO_TLS, "aarch64-tls"},
10146 {MO_DLLIMPORT, "aarch64-dllimport"},
10147 {MO_PREL, "aarch64-prel"},
10148 {MO_TAGGED, "aarch64-tagged"},
10149 {MO_ARM64EC_CALLMANGLE, "aarch64-arm64ec-callmangle"},
10150 };
10151 return ArrayRef(TargetFlags);
10152}
10153
10155AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
10156 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
10157 {{MOSuppressPair, "aarch64-suppress-pair"},
10158 {MOStridedAccess, "aarch64-strided-access"}};
10159 return ArrayRef(TargetFlags);
10160}
10161
10162/// Constants defining how certain sequences should be outlined.
10163/// This encompasses how an outlined function should be called, and what kind of
10164/// frame should be emitted for that outlined function.
10165///
10166/// \p MachineOutlinerDefault implies that the function should be called with
10167/// a save and restore of LR to the stack.
10168///
10169/// That is,
10170///
10171/// I1 Save LR OUTLINED_FUNCTION:
10172/// I2 --> BL OUTLINED_FUNCTION I1
10173/// I3 Restore LR I2
10174/// I3
10175/// RET
10176///
10177/// * Call construction overhead: 3 (save + BL + restore)
10178/// * Frame construction overhead: 1 (ret)
10179/// * Requires stack fixups? Yes
10180///
10181/// \p MachineOutlinerTailCall implies that the function is being created from
10182/// a sequence of instructions ending in a return.
10183///
10184/// That is,
10185///
10186/// I1 OUTLINED_FUNCTION:
10187/// I2 --> B OUTLINED_FUNCTION I1
10188/// RET I2
10189/// RET
10190///
10191/// * Call construction overhead: 1 (B)
10192/// * Frame construction overhead: 0 (Return included in sequence)
10193/// * Requires stack fixups? No
10194///
10195/// \p MachineOutlinerNoLRSave implies that the function should be called using
10196/// a BL instruction, but doesn't require LR to be saved and restored. This
10197/// happens when LR is known to be dead.
10198///
10199/// That is,
10200///
10201/// I1 OUTLINED_FUNCTION:
10202/// I2 --> BL OUTLINED_FUNCTION I1
10203/// I3 I2
10204/// I3
10205/// RET
10206///
10207/// * Call construction overhead: 1 (BL)
10208/// * Frame construction overhead: 1 (RET)
10209/// * Requires stack fixups? No
10210///
10211/// \p MachineOutlinerThunk implies that the function is being created from
10212/// a sequence of instructions ending in a call. The outlined function is
10213/// called with a BL instruction, and the outlined function tail-calls the
10214/// original call destination.
10215///
10216/// That is,
10217///
10218/// I1 OUTLINED_FUNCTION:
10219/// I2 --> BL OUTLINED_FUNCTION I1
10220/// BL f I2
10221/// B f
10222/// * Call construction overhead: 1 (BL)
10223/// * Frame construction overhead: 0
10224/// * Requires stack fixups? No
10225///
10226/// \p MachineOutlinerRegSave implies that the function should be called with a
10227/// save and restore of LR to an available register. This allows us to avoid
10228/// stack fixups. Note that this outlining variant is compatible with the
10229/// NoLRSave case.
10230///
10231/// That is,
10232///
10233/// I1 Save LR OUTLINED_FUNCTION:
10234/// I2 --> BL OUTLINED_FUNCTION I1
10235/// I3 Restore LR I2
10236/// I3
10237/// RET
10238///
10239/// * Call construction overhead: 3 (save + BL + restore)
10240/// * Frame construction overhead: 1 (ret)
10241/// * Requires stack fixups? No
10243 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
10244 MachineOutlinerTailCall, /// Only emit a branch.
10245 MachineOutlinerNoLRSave, /// Emit a call and return.
10246 MachineOutlinerThunk, /// Emit a call and tail-call.
10247 MachineOutlinerRegSave /// Same as default, but save to a register.
10248};
10249
10255
10257AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
10258 MachineFunction *MF = C.getMF();
10259 const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
10260 const AArch64RegisterInfo *ARI =
10261 static_cast<const AArch64RegisterInfo *>(&TRI);
10262 // Check if there is an available register across the sequence that we can
10263 // use.
10264 for (unsigned Reg : AArch64::GPR64RegClass) {
10265 if (!ARI->isReservedReg(*MF, Reg) &&
10266 Reg != AArch64::LR && // LR is not reserved, but don't use it.
10267 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
10268 Reg != AArch64::X17 && // Ditto for X17.
10269 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
10270 C.isAvailableInsideSeq(Reg, TRI))
10271 return Reg;
10272 }
10273 return Register();
10274}
10275
10276static bool
10278 const outliner::Candidate &b) {
10279 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
10280 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
10281
10282 return MFIa->getSignReturnAddressCondition() ==
10284}
10285
10286static bool
10288 const outliner::Candidate &b) {
10289 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
10290 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
10291
10292 return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
10293}
10294
10296 const outliner::Candidate &b) {
10297 const AArch64Subtarget &SubtargetA =
10299 const AArch64Subtarget &SubtargetB =
10300 b.getMF()->getSubtarget<AArch64Subtarget>();
10301 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
10302}
10303
10304std::optional<std::unique_ptr<outliner::OutlinedFunction>>
10305AArch64InstrInfo::getOutliningCandidateInfo(
10306 const MachineModuleInfo &MMI,
10307 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
10308 unsigned MinRepeats) const {
10309 unsigned SequenceSize = 0;
10310 for (auto &MI : RepeatedSequenceLocs[0])
10311 SequenceSize += getInstSizeInBytes(MI);
10312
10313 unsigned NumBytesToCreateFrame = 0;
10314
10315 // Avoid splitting ADRP ADD/LDR pair into outlined functions.
10316 // These instructions are fused together by the scheduler.
10317 // Any candidate where ADRP is the last instruction should be rejected
10318 // as that will lead to splitting ADRP pair.
10319 MachineInstr &LastMI = RepeatedSequenceLocs[0].back();
10320 MachineInstr &FirstMI = RepeatedSequenceLocs[0].front();
10321 if (LastMI.getOpcode() == AArch64::ADRP &&
10322 (LastMI.getOperand(1).getTargetFlags() & AArch64II::MO_PAGE) != 0 &&
10323 (LastMI.getOperand(1).getTargetFlags() & AArch64II::MO_GOT) != 0) {
10324 return std::nullopt;
10325 }
10326
10327 // Similarly any candidate where the first instruction is ADD/LDR with a
10328 // page offset should be rejected to avoid ADRP splitting.
10329 if ((FirstMI.getOpcode() == AArch64::ADDXri ||
10330 FirstMI.getOpcode() == AArch64::LDRXui) &&
10331 (FirstMI.getOperand(2).getTargetFlags() & AArch64II::MO_PAGEOFF) != 0 &&
10332 (FirstMI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) != 0) {
10333 return std::nullopt;
10334 }
10335
10336 // We only allow outlining for functions having exactly matching return
10337 // address signing attributes, i.e., all share the same value for the
10338 // attribute "sign-return-address" and all share the same type of key they
10339 // are signed with.
10340 // Additionally we require all functions to simultaneously either support
10341 // v8.3a features or not. Otherwise an outlined function could get signed
10342 // using dedicated v8.3 instructions and a call from a function that doesn't
10343 // support v8.3 instructions would therefore be invalid.
10344 if (std::adjacent_find(
10345 RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
10346 [](const outliner::Candidate &a, const outliner::Candidate &b) {
10347 // Return true if a and b are non-equal w.r.t. return address
10348 // signing or support of v8.3a features
10349 if (outliningCandidatesSigningScopeConsensus(a, b) &&
10350 outliningCandidatesSigningKeyConsensus(a, b) &&
10351 outliningCandidatesV8_3OpsConsensus(a, b)) {
10352 return false;
10353 }
10354 return true;
10355 }) != RepeatedSequenceLocs.end()) {
10356 return std::nullopt;
10357 }
10358
10359 // Since at this point all candidates agree on their return address signing
10360 // picking just one is fine. If the candidate functions potentially sign their
10361 // return addresses, the outlined function should do the same. Note that in
10362 // the case of "sign-return-address"="non-leaf" this is an assumption: It is
10363 // not certainly true that the outlined function will have to sign its return
10364 // address but this decision is made later, when the decision to outline
10365 // has already been made.
10366 // The same holds for the number of additional instructions we need: On
10367 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
10368 // necessary. However, at this point we don't know if the outlined function
10369 // will have a RET instruction so we assume the worst.
10370 const TargetRegisterInfo &TRI = getRegisterInfo();
10371 // Performing a tail call may require extra checks when PAuth is enabled.
10372 // If PAuth is disabled, set it to zero for uniformity.
10373 unsigned NumBytesToCheckLRInTCEpilogue = 0;
10374 const auto RASignCondition = RepeatedSequenceLocs[0]
10375 .getMF()
10376 ->getInfo<AArch64FunctionInfo>()
10377 ->getSignReturnAddressCondition();
10378 if (RASignCondition != SignReturnAddress::None) {
10379 // One PAC and one AUT instructions
10380 NumBytesToCreateFrame += 8;
10381
10382 // PAuth is enabled - set extra tail call cost, if any.
10383 auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod(
10384 *RepeatedSequenceLocs[0].getMF());
10385 NumBytesToCheckLRInTCEpilogue =
10387 // Checking the authenticated LR value may significantly impact
10388 // SequenceSize, so account for it for more precise results.
10389 if (isTailCallReturnInst(RepeatedSequenceLocs[0].back()))
10390 SequenceSize += NumBytesToCheckLRInTCEpilogue;
10391
10392 // We have to check if sp modifying instructions would get outlined.
10393 // If so we only allow outlining if sp is unchanged overall, so matching
10394 // sub and add instructions are okay to outline, all other sp modifications
10395 // are not
10396 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
10397 int SPValue = 0;
10398 for (auto &MI : C) {
10399 if (MI.modifiesRegister(AArch64::SP, &TRI)) {
10400 switch (MI.getOpcode()) {
10401 case AArch64::ADDXri:
10402 case AArch64::ADDWri:
10403 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
10404 assert(MI.getOperand(2).isImm() &&
10405 "Expected operand to be immediate");
10406 assert(MI.getOperand(1).isReg() &&
10407 "Expected operand to be a register");
10408 // Check if the add just increments sp. If so, we search for
10409 // matching sub instructions that decrement sp. If not, the
10410 // modification is illegal
10411 if (MI.getOperand(1).getReg() == AArch64::SP)
10412 SPValue += MI.getOperand(2).getImm();
10413 else
10414 return true;
10415 break;
10416 case AArch64::SUBXri:
10417 case AArch64::SUBWri:
10418 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
10419 assert(MI.getOperand(2).isImm() &&
10420 "Expected operand to be immediate");
10421 assert(MI.getOperand(1).isReg() &&
10422 "Expected operand to be a register");
10423 // Check if the sub just decrements sp. If so, we search for
10424 // matching add instructions that increment sp. If not, the
10425 // modification is illegal
10426 if (MI.getOperand(1).getReg() == AArch64::SP)
10427 SPValue -= MI.getOperand(2).getImm();
10428 else
10429 return true;
10430 break;
10431 default:
10432 return true;
10433 }
10434 }
10435 }
10436 if (SPValue)
10437 return true;
10438 return false;
10439 };
10440 // Remove candidates with illegal stack modifying instructions
10441 llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
10442
10443 // If the sequence doesn't have enough candidates left, then we're done.
10444 if (RepeatedSequenceLocs.size() < MinRepeats)
10445 return std::nullopt;
10446 }
10447
10448 // Properties about candidate MBBs that hold for all of them.
10449 unsigned FlagsSetInAll = 0xF;
10450
10451 // Compute liveness information for each candidate, and set FlagsSetInAll.
10452 for (outliner::Candidate &C : RepeatedSequenceLocs)
10453 FlagsSetInAll &= C.Flags;
10454
10455 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
10456
10457 // Helper lambda which sets call information for every candidate.
10458 auto SetCandidateCallInfo =
10459 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
10460 for (outliner::Candidate &C : RepeatedSequenceLocs)
10461 C.setCallInfo(CallID, NumBytesForCall);
10462 };
10463
10464 unsigned FrameID = MachineOutlinerDefault;
10465 NumBytesToCreateFrame += 4;
10466
10467 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
10468 return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
10469 });
10470
10471 // We check to see if CFI Instructions are present, and if they are
10472 // we find the number of CFI Instructions in the candidates.
10473 unsigned CFICount = 0;
10474 for (auto &I : RepeatedSequenceLocs[0]) {
10475 if (I.isCFIInstruction())
10476 CFICount++;
10477 }
10478
10479 // We compare the number of found CFI Instructions to the number of CFI
10480 // instructions in the parent function for each candidate. We must check this
10481 // since if we outline one of the CFI instructions in a function, we have to
10482 // outline them all for correctness. If we do not, the address offsets will be
10483 // incorrect between the two sections of the program.
10484 for (outliner::Candidate &C : RepeatedSequenceLocs) {
10485 std::vector<MCCFIInstruction> CFIInstructions =
10486 C.getMF()->getFrameInstructions();
10487
10488 if (CFICount > 0 && CFICount != CFIInstructions.size())
10489 return std::nullopt;
10490 }
10491
10492 // Returns true if an instructions is safe to fix up, false otherwise.
10493 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
10494 if (MI.isCall())
10495 return true;
10496
10497 if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
10498 !MI.readsRegister(AArch64::SP, &TRI))
10499 return true;
10500
10501 // Any modification of SP will break our code to save/restore LR.
10502 // FIXME: We could handle some instructions which add a constant
10503 // offset to SP, with a bit more work.
10504 if (MI.modifiesRegister(AArch64::SP, &TRI))
10505 return false;
10506
10507 // At this point, we have a stack instruction that we might need to
10508 // fix up. We'll handle it if it's a load or store.
10509 if (MI.mayLoadOrStore()) {
10510 const MachineOperand *Base; // Filled with the base operand of MI.
10511 int64_t Offset; // Filled with the offset of MI.
10512 bool OffsetIsScalable;
10513
10514 // Does it allow us to offset the base operand and is the base the
10515 // register SP?
10516 if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
10517 !Base->isReg() || Base->getReg() != AArch64::SP)
10518 return false;
10519
10520 // Fixe-up code below assumes bytes.
10521 if (OffsetIsScalable)
10522 return false;
10523
10524 // Find the minimum/maximum offset for this instruction and check
10525 // if fixing it up would be in range.
10526 int64_t MinOffset,
10527 MaxOffset; // Unscaled offsets for the instruction.
10528 // The scale to multiply the offsets by.
10529 TypeSize Scale(0U, false), DummyWidth(0U, false);
10530 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
10531
10532 Offset += 16; // Update the offset to what it would be if we outlined.
10533 if (Offset < MinOffset * (int64_t)Scale.getFixedValue() ||
10534 Offset > MaxOffset * (int64_t)Scale.getFixedValue())
10535 return false;
10536
10537 // It's in range, so we can outline it.
10538 return true;
10539 }
10540
10541 // FIXME: Add handling for instructions like "add x0, sp, #8".
10542
10543 // We can't fix it up, so don't outline it.
10544 return false;
10545 };
10546
10547 // True if it's possible to fix up each stack instruction in this sequence.
10548 // Important for frames/call variants that modify the stack.
10549 bool AllStackInstrsSafe =
10550 llvm::all_of(RepeatedSequenceLocs[0], IsSafeToFixup);
10551
10552 // If the last instruction in any candidate is a terminator, then we should
10553 // tail call all of the candidates.
10554 if (RepeatedSequenceLocs[0].back().isTerminator()) {
10555 FrameID = MachineOutlinerTailCall;
10556 NumBytesToCreateFrame = 0;
10557 unsigned NumBytesForCall = 4 + NumBytesToCheckLRInTCEpilogue;
10558 SetCandidateCallInfo(MachineOutlinerTailCall, NumBytesForCall);
10559 }
10560
10561 else if (LastInstrOpcode == AArch64::BL ||
10562 ((LastInstrOpcode == AArch64::BLR ||
10563 LastInstrOpcode == AArch64::BLRNoIP) &&
10564 !HasBTI)) {
10565 // FIXME: Do we need to check if the code after this uses the value of LR?
10566 FrameID = MachineOutlinerThunk;
10567 NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue;
10568 SetCandidateCallInfo(MachineOutlinerThunk, 4);
10569 }
10570
10571 else {
10572 // We need to decide how to emit calls + frames. We can always emit the same
10573 // frame if we don't need to save to the stack. If we have to save to the
10574 // stack, then we need a different frame.
10575 unsigned NumBytesNoStackCalls = 0;
10576 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
10577
10578 // Check if we have to save LR.
10579 for (outliner::Candidate &C : RepeatedSequenceLocs) {
10580 bool LRAvailable =
10582 ? C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI)
10583 : true;
10584 // If we have a noreturn caller, then we're going to be conservative and
10585 // say that we have to save LR. If we don't have a ret at the end of the
10586 // block, then we can't reason about liveness accurately.
10587 //
10588 // FIXME: We can probably do better than always disabling this in
10589 // noreturn functions by fixing up the liveness info.
10590 bool IsNoReturn =
10591 C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
10592
10593 // Is LR available? If so, we don't need a save.
10594 if (LRAvailable && !IsNoReturn) {
10595 NumBytesNoStackCalls += 4;
10596 C.setCallInfo(MachineOutlinerNoLRSave, 4);
10597 CandidatesWithoutStackFixups.push_back(C);
10598 }
10599
10600 // Is an unused register available? If so, we won't modify the stack, so
10601 // we can outline with the same frame type as those that don't save LR.
10602 else if (findRegisterToSaveLRTo(C)) {
10603 NumBytesNoStackCalls += 12;
10604 C.setCallInfo(MachineOutlinerRegSave, 12);
10605 CandidatesWithoutStackFixups.push_back(C);
10606 }
10607
10608 // Is SP used in the sequence at all? If not, we don't have to modify
10609 // the stack, so we are guaranteed to get the same frame.
10610 else if (C.isAvailableInsideSeq(AArch64::SP, TRI)) {
10611 NumBytesNoStackCalls += 12;
10612 C.setCallInfo(MachineOutlinerDefault, 12);
10613 CandidatesWithoutStackFixups.push_back(C);
10614 }
10615
10616 // If we outline this, we need to modify the stack. Pretend we don't
10617 // outline this by saving all of its bytes.
10618 else {
10619 NumBytesNoStackCalls += SequenceSize;
10620 }
10621 }
10622
10623 // If there are no places where we have to save LR, then note that we
10624 // don't have to update the stack. Otherwise, give every candidate the
10625 // default call type, as long as it's safe to do so.
10626 if (!AllStackInstrsSafe ||
10627 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
10628 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
10629 FrameID = MachineOutlinerNoLRSave;
10630 if (RepeatedSequenceLocs.size() < MinRepeats)
10631 return std::nullopt;
10632 } else {
10633 SetCandidateCallInfo(MachineOutlinerDefault, 12);
10634
10635 // Bugzilla ID: 46767
10636 // TODO: Check if fixing up the stack more than once is safe so we can
10637 // outline these.
10638 //
10639 // An outline resulting in a caller that requires stack fixups at the
10640 // callsite to a callee that also requires stack fixups can happen when
10641 // there are no available registers at the candidate callsite for a
10642 // candidate that itself also has calls.
10643 //
10644 // In other words if function_containing_sequence in the following pseudo
10645 // assembly requires that we save LR at the point of the call, but there
10646 // are no available registers: in this case we save using SP and as a
10647 // result the SP offsets requires stack fixups by multiples of 16.
10648 //
10649 // function_containing_sequence:
10650 // ...
10651 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
10652 // call OUTLINED_FUNCTION_N
10653 // restore LR from SP
10654 // ...
10655 //
10656 // OUTLINED_FUNCTION_N:
10657 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
10658 // ...
10659 // bl foo
10660 // restore LR from SP
10661 // ret
10662 //
10663 // Because the code to handle more than one stack fixup does not
10664 // currently have the proper checks for legality, these cases will assert
10665 // in the AArch64 MachineOutliner. This is because the code to do this
10666 // needs more hardening, testing, better checks that generated code is
10667 // legal, etc and because it is only verified to handle a single pass of
10668 // stack fixup.
10669 //
10670 // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
10671 // these cases until they are known to be handled. Bugzilla 46767 is
10672 // referenced in comments at the assert site.
10673 //
10674 // To avoid asserting (or generating non-legal code on noassert builds)
10675 // we remove all candidates which would need more than one stack fixup by
10676 // pruning the cases where the candidate has calls while also having no
10677 // available LR and having no available general purpose registers to copy
10678 // LR to (ie one extra stack save/restore).
10679 //
10680 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
10681 erase_if(RepeatedSequenceLocs, [this, &TRI](outliner::Candidate &C) {
10682 auto IsCall = [](const MachineInstr &MI) { return MI.isCall(); };
10683 return (llvm::any_of(C, IsCall)) &&
10684 (!C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) ||
10685 !findRegisterToSaveLRTo(C));
10686 });
10687 }
10688 }
10689
10690 // If we dropped all of the candidates, bail out here.
10691 if (RepeatedSequenceLocs.size() < MinRepeats)
10692 return std::nullopt;
10693 }
10694
10695 // Does every candidate's MBB contain a call? If so, then we might have a call
10696 // in the range.
10697 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
10698 // Check if the range contains a call. These require a save + restore of the
10699 // link register.
10700 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
10701 bool ModStackToSaveLR = false;
10702 if (any_of(drop_end(FirstCand),
10703 [](const MachineInstr &MI) { return MI.isCall(); }))
10704 ModStackToSaveLR = true;
10705
10706 // Handle the last instruction separately. If this is a tail call, then the
10707 // last instruction is a call. We don't want to save + restore in this case.
10708 // However, it could be possible that the last instruction is a call without
10709 // it being valid to tail call this sequence. We should consider this as
10710 // well.
10711 else if (FrameID != MachineOutlinerThunk &&
10712 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
10713 ModStackToSaveLR = true;
10714
10715 if (ModStackToSaveLR) {
10716 // We can't fix up the stack. Bail out.
10717 if (!AllStackInstrsSafe)
10718 return std::nullopt;
10719
10720 // Save + restore LR.
10721 NumBytesToCreateFrame += 8;
10722 }
10723 }
10724
10725 // If we have CFI instructions, we can only outline if the outlined section
10726 // can be a tail call
10727 if (FrameID != MachineOutlinerTailCall && CFICount > 0)
10728 return std::nullopt;
10729
10730 return std::make_unique<outliner::OutlinedFunction>(
10731 RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID);
10732}
10733
10734void AArch64InstrInfo::mergeOutliningCandidateAttributes(
10735 Function &F, std::vector<outliner::Candidate> &Candidates) const {
10736 // If a bunch of candidates reach this point they must agree on their return
10737 // address signing. It is therefore enough to just consider the signing
10738 // behaviour of one of them
10739 const auto &CFn = Candidates.front().getMF()->getFunction();
10740
10741 if (CFn.hasFnAttribute("ptrauth-returns"))
10742 F.addFnAttr(CFn.getFnAttribute("ptrauth-returns"));
10743 if (CFn.hasFnAttribute("ptrauth-auth-traps"))
10744 F.addFnAttr(CFn.getFnAttribute("ptrauth-auth-traps"));
10745 // Since all candidates belong to the same module, just copy the
10746 // function-level attributes of an arbitrary function.
10747 if (CFn.hasFnAttribute("sign-return-address"))
10748 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
10749 if (CFn.hasFnAttribute("sign-return-address-key"))
10750 F.addFnAttr(CFn.getFnAttribute("sign-return-address-key"));
10751
10752 AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
10753}
10754
10755bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
10756 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
10757 const Function &F = MF.getFunction();
10758
10759 // Can F be deduplicated by the linker? If it can, don't outline from it.
10760 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
10761 return false;
10762
10763 // Don't outline from functions with section markings; the program could
10764 // expect that all the code is in the named section.
10765 // FIXME: Allow outlining from multiple functions with the same section
10766 // marking.
10767 if (F.hasSection())
10768 return false;
10769
10770 // Outlining from functions with redzones is unsafe since the outliner may
10771 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
10772 // outline from it.
10773 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
10774 if (!AFI || AFI->hasRedZone().value_or(true))
10775 return false;
10776
10777 // FIXME: Determine whether it is safe to outline from functions which contain
10778 // streaming-mode changes. We may need to ensure any smstart/smstop pairs are
10779 // outlined together and ensure it is safe to outline with async unwind info,
10780 // required for saving & restoring VG around calls.
10781 if (AFI->hasStreamingModeChanges())
10782 return false;
10783
10784 // FIXME: Teach the outliner to generate/handle Windows unwind info.
10786 return false;
10787
10788 // It's safe to outline from MF.
10789 return true;
10790}
10791
10793AArch64InstrInfo::getOutlinableRanges(MachineBasicBlock &MBB,
10794 unsigned &Flags) const {
10796 "Must track liveness!");
10798 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
10799 Ranges;
10800 // According to the AArch64 Procedure Call Standard, the following are
10801 // undefined on entry/exit from a function call:
10802 //
10803 // * Registers x16, x17, (and thus w16, w17)
10804 // * Condition codes (and thus the NZCV register)
10805 //
10806 // If any of these registers are used inside or live across an outlined
10807 // function, then they may be modified later, either by the compiler or
10808 // some other tool (like the linker).
10809 //
10810 // To avoid outlining in these situations, partition each block into ranges
10811 // where these registers are dead. We will only outline from those ranges.
10812 LiveRegUnits LRU(getRegisterInfo());
10813 auto AreAllUnsafeRegsDead = [&LRU]() {
10814 return LRU.available(AArch64::W16) && LRU.available(AArch64::W17) &&
10815 LRU.available(AArch64::NZCV);
10816 };
10817
10818 // We need to know if LR is live across an outlining boundary later on in
10819 // order to decide how we'll create the outlined call, frame, etc.
10820 //
10821 // It's pretty expensive to check this for *every candidate* within a block.
10822 // That's some potentially n^2 behaviour, since in the worst case, we'd need
10823 // to compute liveness from the end of the block for O(n) candidates within
10824 // the block.
10825 //
10826 // So, to improve the average case, let's keep track of liveness from the end
10827 // of the block to the beginning of *every outlinable range*. If we know that
10828 // LR is available in every range we could outline from, then we know that
10829 // we don't need to check liveness for any candidate within that range.
10830 bool LRAvailableEverywhere = true;
10831 // Compute liveness bottom-up.
10832 LRU.addLiveOuts(MBB);
10833 // Update flags that require info about the entire MBB.
10834 auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) {
10835 if (MI.isCall() && !MI.isTerminator())
10837 };
10838 // Range: [RangeBegin, RangeEnd)
10839 MachineBasicBlock::instr_iterator RangeBegin, RangeEnd;
10840 unsigned RangeLen;
10841 auto CreateNewRangeStartingAt =
10842 [&RangeBegin, &RangeEnd,
10843 &RangeLen](MachineBasicBlock::instr_iterator NewBegin) {
10844 RangeBegin = NewBegin;
10845 RangeEnd = std::next(RangeBegin);
10846 RangeLen = 0;
10847 };
10848 auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() {
10849 // At least one unsafe register is not dead. We do not want to outline at
10850 // this point. If it is long enough to outline from and does not cross a
10851 // bundle boundary, save the range [RangeBegin, RangeEnd).
10852 if (RangeLen <= 1)
10853 return;
10854 if (!RangeBegin.isEnd() && RangeBegin->isBundledWithPred())
10855 return;
10856 if (!RangeEnd.isEnd() && RangeEnd->isBundledWithPred())
10857 return;
10858 Ranges.emplace_back(RangeBegin, RangeEnd);
10859 };
10860 // Find the first point where all unsafe registers are dead.
10861 // FIND: <safe instr> <-- end of first potential range
10862 // SKIP: <unsafe def>
10863 // SKIP: ... everything between ...
10864 // SKIP: <unsafe use>
10865 auto FirstPossibleEndPt = MBB.instr_rbegin();
10866 for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) {
10867 if (!FirstPossibleEndPt->isDebugInstr())
10868 LRU.stepBackward(*FirstPossibleEndPt);
10869 // Update flags that impact how we outline across the entire block,
10870 // regardless of safety.
10871 UpdateWholeMBBFlags(*FirstPossibleEndPt);
10872 if (AreAllUnsafeRegsDead())
10873 break;
10874 }
10875 // If we exhausted the entire block, we have no safe ranges to outline.
10876 if (FirstPossibleEndPt == MBB.instr_rend())
10877 return Ranges;
10878 // Current range.
10879 CreateNewRangeStartingAt(FirstPossibleEndPt->getIterator());
10880 // StartPt points to the first place where all unsafe registers
10881 // are dead (if there is any such point). Begin partitioning the MBB into
10882 // ranges.
10883 for (auto &MI : make_range(FirstPossibleEndPt, MBB.instr_rend())) {
10884 if (!MI.isDebugInstr())
10885 LRU.stepBackward(MI);
10886 UpdateWholeMBBFlags(MI);
10887 if (!AreAllUnsafeRegsDead()) {
10888 SaveRangeIfNonEmpty();
10889 CreateNewRangeStartingAt(MI.getIterator());
10890 continue;
10891 }
10892 LRAvailableEverywhere &= LRU.available(AArch64::LR);
10893 RangeBegin = MI.getIterator();
10894 ++RangeLen;
10895 }
10896 // Above loop misses the last (or only) range. If we are still safe, then
10897 // let's save the range.
10898 if (AreAllUnsafeRegsDead())
10899 SaveRangeIfNonEmpty();
10900 if (Ranges.empty())
10901 return Ranges;
10902 // We found the ranges bottom-up. Mapping expects the top-down. Reverse
10903 // the order.
10904 std::reverse(Ranges.begin(), Ranges.end());
10905 // If there is at least one outlinable range where LR is unavailable
10906 // somewhere, remember that.
10907 if (!LRAvailableEverywhere)
10909 return Ranges;
10910}
10911
10913AArch64InstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
10915 unsigned Flags) const {
10916 MachineInstr &MI = *MIT;
10917
10918 // Don't outline anything used for return address signing. The outlined
10919 // function will get signed later if needed
10920 switch (MI.getOpcode()) {
10921 case AArch64::PACM:
10922 case AArch64::PACIASP:
10923 case AArch64::PACIBSP:
10924 case AArch64::PACIASPPC:
10925 case AArch64::PACIBSPPC:
10926 case AArch64::AUTIASP:
10927 case AArch64::AUTIBSP:
10928 case AArch64::AUTIASPPCi:
10929 case AArch64::AUTIASPPCr:
10930 case AArch64::AUTIBSPPCi:
10931 case AArch64::AUTIBSPPCr:
10932 case AArch64::RETAA:
10933 case AArch64::RETAB:
10934 case AArch64::RETAASPPCi:
10935 case AArch64::RETAASPPCr:
10936 case AArch64::RETABSPPCi:
10937 case AArch64::RETABSPPCr:
10938 case AArch64::EMITBKEY:
10939 case AArch64::PAUTH_PROLOGUE:
10940 case AArch64::PAUTH_EPILOGUE:
10942 }
10943
10944 // We can only outline these if we will tail call the outlined function, or
10945 // fix up the CFI offsets. Currently, CFI instructions are outlined only if
10946 // in a tail call.
10947 //
10948 // FIXME: If the proper fixups for the offset are implemented, this should be
10949 // possible.
10950 if (MI.isCFIInstruction())
10952
10953 // Is this a terminator for a basic block?
10954 if (MI.isTerminator())
10955 // TargetInstrInfo::getOutliningType has already filtered out anything
10956 // that would break this, so we can allow it here.
10958
10959 // Make sure none of the operands are un-outlinable.
10960 for (const MachineOperand &MOP : MI.operands()) {
10961 // A check preventing CFI indices was here before, but only CFI
10962 // instructions should have those.
10963 assert(!MOP.isCFIIndex());
10964
10965 // If it uses LR or W30 explicitly, then don't touch it.
10966 if (MOP.isReg() && !MOP.isImplicit() &&
10967 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
10969 }
10970
10971 // Special cases for instructions that can always be outlined, but will fail
10972 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
10973 // be outlined because they don't require a *specific* value to be in LR.
10974 if (MI.getOpcode() == AArch64::ADRP)
10976
10977 // If MI is a call we might be able to outline it. We don't want to outline
10978 // any calls that rely on the position of items on the stack. When we outline
10979 // something containing a call, we have to emit a save and restore of LR in
10980 // the outlined function. Currently, this always happens by saving LR to the
10981 // stack. Thus, if we outline, say, half the parameters for a function call
10982 // plus the call, then we'll break the callee's expectations for the layout
10983 // of the stack.
10984 //
10985 // FIXME: Allow calls to functions which construct a stack frame, as long
10986 // as they don't access arguments on the stack.
10987 // FIXME: Figure out some way to analyze functions defined in other modules.
10988 // We should be able to compute the memory usage based on the IR calling
10989 // convention, even if we can't see the definition.
10990 if (MI.isCall()) {
10991 // Get the function associated with the call. Look at each operand and find
10992 // the one that represents the callee and get its name.
10993 const Function *Callee = nullptr;
10994 for (const MachineOperand &MOP : MI.operands()) {
10995 if (MOP.isGlobal()) {
10996 Callee = dyn_cast<Function>(MOP.getGlobal());
10997 break;
10998 }
10999 }
11000
11001 // Never outline calls to mcount. There isn't any rule that would require
11002 // this, but the Linux kernel's "ftrace" feature depends on it.
11003 if (Callee && Callee->getName() == "\01_mcount")
11005
11006 // If we don't know anything about the callee, assume it depends on the
11007 // stack layout of the caller. In that case, it's only legal to outline
11008 // as a tail-call. Explicitly list the call instructions we know about so we
11009 // don't get unexpected results with call pseudo-instructions.
11010 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
11011 if (MI.getOpcode() == AArch64::BLR ||
11012 MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
11013 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
11014
11015 if (!Callee)
11016 return UnknownCallOutlineType;
11017
11018 // We have a function we have information about. Check it if it's something
11019 // can safely outline.
11020 MachineFunction *CalleeMF = MMI.getMachineFunction(*Callee);
11021
11022 // We don't know what's going on with the callee at all. Don't touch it.
11023 if (!CalleeMF)
11024 return UnknownCallOutlineType;
11025
11026 // Check if we know anything about the callee saves on the function. If we
11027 // don't, then don't touch it, since that implies that we haven't
11028 // computed anything about its stack frame yet.
11029 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
11030 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
11031 MFI.getNumObjects() > 0)
11032 return UnknownCallOutlineType;
11033
11034 // At this point, we can say that CalleeMF ought to not pass anything on the
11035 // stack. Therefore, we can outline it.
11037 }
11038
11039 // Don't touch the link register or W30.
11040 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
11041 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
11043
11044 // Don't outline BTI instructions, because that will prevent the outlining
11045 // site from being indirectly callable.
11046 if (hasBTISemantics(MI))
11048
11050}
11051
11052void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
11053 for (MachineInstr &MI : MBB) {
11054 const MachineOperand *Base;
11055 TypeSize Width(0, false);
11056 int64_t Offset;
11057 bool OffsetIsScalable;
11058
11059 // Is this a load or store with an immediate offset with SP as the base?
11060 if (!MI.mayLoadOrStore() ||
11061 !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
11062 &RI) ||
11063 (Base->isReg() && Base->getReg() != AArch64::SP))
11064 continue;
11065
11066 // It is, so we have to fix it up.
11067 TypeSize Scale(0U, false);
11068 int64_t Dummy1, Dummy2;
11069
11070 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
11071 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
11072 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
11073 assert(Scale != 0 && "Unexpected opcode!");
11074 assert(!OffsetIsScalable && "Expected offset to be a byte offset");
11075
11076 // We've pushed the return address to the stack, so add 16 to the offset.
11077 // This is safe, since we already checked if it would overflow when we
11078 // checked if this instruction was legal to outline.
11079 int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedValue();
11080 StackOffsetOperand.setImm(NewImm);
11081 }
11082}
11083
11085 const AArch64InstrInfo *TII,
11086 bool ShouldSignReturnAddr) {
11087 if (!ShouldSignReturnAddr)
11088 return;
11089
11090 BuildMI(MBB, MBB.begin(), DebugLoc(), TII->get(AArch64::PAUTH_PROLOGUE))
11092 TII->createPauthEpilogueInstr(MBB, DebugLoc());
11093}
11094
11095void AArch64InstrInfo::buildOutlinedFrame(
11097 const outliner::OutlinedFunction &OF) const {
11098
11099 AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>();
11100
11101 if (OF.FrameConstructionID == MachineOutlinerTailCall)
11102 FI->setOutliningStyle("Tail Call");
11103 else if (OF.FrameConstructionID == MachineOutlinerThunk) {
11104 // For thunk outlining, rewrite the last instruction from a call to a
11105 // tail-call.
11106 MachineInstr *Call = &*--MBB.instr_end();
11107 unsigned TailOpcode;
11108 if (Call->getOpcode() == AArch64::BL) {
11109 TailOpcode = AArch64::TCRETURNdi;
11110 } else {
11111 assert(Call->getOpcode() == AArch64::BLR ||
11112 Call->getOpcode() == AArch64::BLRNoIP);
11113 TailOpcode = AArch64::TCRETURNriALL;
11114 }
11115 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
11116 .add(Call->getOperand(0))
11117 .addImm(0);
11118 MBB.insert(MBB.end(), TC);
11120
11121 FI->setOutliningStyle("Thunk");
11122 }
11123
11124 bool IsLeafFunction = true;
11125
11126 // Is there a call in the outlined range?
11127 auto IsNonTailCall = [](const MachineInstr &MI) {
11128 return MI.isCall() && !MI.isReturn();
11129 };
11130
11131 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
11132 // Fix up the instructions in the range, since we're going to modify the
11133 // stack.
11134
11135 // Bugzilla ID: 46767
11136 // TODO: Check if fixing up twice is safe so we can outline these.
11137 assert(OF.FrameConstructionID != MachineOutlinerDefault &&
11138 "Can only fix up stack references once");
11139 fixupPostOutline(MBB);
11140
11141 IsLeafFunction = false;
11142
11143 // LR has to be a live in so that we can save it.
11144 if (!MBB.isLiveIn(AArch64::LR))
11145 MBB.addLiveIn(AArch64::LR);
11146
11149
11150 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
11151 OF.FrameConstructionID == MachineOutlinerThunk)
11152 Et = std::prev(MBB.end());
11153
11154 // Insert a save before the outlined region
11155 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
11156 .addReg(AArch64::SP, RegState::Define)
11157 .addReg(AArch64::LR)
11158 .addReg(AArch64::SP)
11159 .addImm(-16);
11160 It = MBB.insert(It, STRXpre);
11161
11162 if (MF.getInfo<AArch64FunctionInfo>()->needsDwarfUnwindInfo(MF)) {
11163 CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameSetup);
11164
11165 // Add a CFI saying the stack was moved 16 B down.
11166 CFIBuilder.buildDefCFAOffset(16);
11167
11168 // Add a CFI saying that the LR that we want to find is now 16 B higher
11169 // than before.
11170 CFIBuilder.buildOffset(AArch64::LR, -16);
11171 }
11172
11173 // Insert a restore before the terminator for the function.
11174 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
11175 .addReg(AArch64::SP, RegState::Define)
11176 .addReg(AArch64::LR, RegState::Define)
11177 .addReg(AArch64::SP)
11178 .addImm(16);
11179 Et = MBB.insert(Et, LDRXpost);
11180 }
11181
11182 auto RASignCondition = FI->getSignReturnAddressCondition();
11183 bool ShouldSignReturnAddr = AArch64FunctionInfo::shouldSignReturnAddress(
11184 RASignCondition, !IsLeafFunction);
11185
11186 // If this is a tail call outlined function, then there's already a return.
11187 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
11188 OF.FrameConstructionID == MachineOutlinerThunk) {
11189 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
11190 return;
11191 }
11192
11193 // It's not a tail call, so we have to insert the return ourselves.
11194
11195 // LR has to be a live in so that we can return to it.
11196 if (!MBB.isLiveIn(AArch64::LR))
11197 MBB.addLiveIn(AArch64::LR);
11198
11199 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
11200 .addReg(AArch64::LR);
11201 MBB.insert(MBB.end(), ret);
11202
11203 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
11204
11205 FI->setOutliningStyle("Function");
11206
11207 // Did we have to modify the stack by saving the link register?
11208 if (OF.FrameConstructionID != MachineOutlinerDefault)
11209 return;
11210
11211 // We modified the stack.
11212 // Walk over the basic block and fix up all the stack accesses.
11213 fixupPostOutline(MBB);
11214}
11215
11216MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
11219
11220 // Are we tail calling?
11221 if (C.CallConstructionID == MachineOutlinerTailCall) {
11222 // If yes, then we can just branch to the label.
11223 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
11224 .addGlobalAddress(M.getNamedValue(MF.getName()))
11225 .addImm(0));
11226 return It;
11227 }
11228
11229 // Are we saving the link register?
11230 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
11231 C.CallConstructionID == MachineOutlinerThunk) {
11232 // No, so just insert the call.
11233 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
11234 .addGlobalAddress(M.getNamedValue(MF.getName())));
11235 return It;
11236 }
11237
11238 // We want to return the spot where we inserted the call.
11240
11241 // Instructions for saving and restoring LR around the call instruction we're
11242 // going to insert.
11243 MachineInstr *Save;
11244 MachineInstr *Restore;
11245 // Can we save to a register?
11246 if (C.CallConstructionID == MachineOutlinerRegSave) {
11247 // FIXME: This logic should be sunk into a target-specific interface so that
11248 // we don't have to recompute the register.
11249 Register Reg = findRegisterToSaveLRTo(C);
11250 assert(Reg && "No callee-saved register available?");
11251
11252 // LR has to be a live in so that we can save it.
11253 if (!MBB.isLiveIn(AArch64::LR))
11254 MBB.addLiveIn(AArch64::LR);
11255
11256 // Save and restore LR from Reg.
11257 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
11258 .addReg(AArch64::XZR)
11259 .addReg(AArch64::LR)
11260 .addImm(0);
11261 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
11262 .addReg(AArch64::XZR)
11263 .addReg(Reg)
11264 .addImm(0);
11265 } else {
11266 // We have the default case. Save and restore from SP.
11267 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
11268 .addReg(AArch64::SP, RegState::Define)
11269 .addReg(AArch64::LR)
11270 .addReg(AArch64::SP)
11271 .addImm(-16);
11272 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
11273 .addReg(AArch64::SP, RegState::Define)
11274 .addReg(AArch64::LR, RegState::Define)
11275 .addReg(AArch64::SP)
11276 .addImm(16);
11277 }
11278
11279 It = MBB.insert(It, Save);
11280 It++;
11281
11282 // Insert the call.
11283 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
11284 .addGlobalAddress(M.getNamedValue(MF.getName())));
11285 CallPt = It;
11286 It++;
11287
11288 It = MBB.insert(It, Restore);
11289 return CallPt;
11290}
11291
11292bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
11293 MachineFunction &MF) const {
11294 return MF.getFunction().hasMinSize();
11295}
11296
11297void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
11299 DebugLoc &DL,
11300 bool AllowSideEffects) const {
11301 const MachineFunction &MF = *MBB.getParent();
11302 const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
11303 const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
11304
11305 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
11306 BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
11307 } else if (STI.isSVEorStreamingSVEAvailable()) {
11308 BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
11309 .addImm(0)
11310 .addImm(0);
11311 } else if (STI.isNeonAvailable()) {
11312 BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
11313 .addImm(0);
11314 } else {
11315 // This is a streaming-compatible function without SVE. We don't have full
11316 // Neon (just FPRs), so we can at most use the first 64-bit sub-register.
11317 // So given `movi v..` would be illegal use `fmov d..` instead.
11318 assert(STI.hasNEON() && "Expected to have NEON.");
11319 Register Reg64 = TRI.getSubReg(Reg, AArch64::dsub);
11320 BuildMI(MBB, Iter, DL, get(AArch64::FMOVD0), Reg64);
11321 }
11322}
11323
11324std::optional<DestSourcePair>
11326
11327 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
11328 // and zero immediate operands used as an alias for mov instruction.
11329 if (((MI.getOpcode() == AArch64::ORRWrs &&
11330 MI.getOperand(1).getReg() == AArch64::WZR &&
11331 MI.getOperand(3).getImm() == 0x0) ||
11332 (MI.getOpcode() == AArch64::ORRWrr &&
11333 MI.getOperand(1).getReg() == AArch64::WZR)) &&
11334 // Check that the w->w move is not a zero-extending w->x mov.
11335 (!MI.getOperand(0).getReg().isVirtual() ||
11336 MI.getOperand(0).getSubReg() == 0) &&
11337 (!MI.getOperand(0).getReg().isPhysical() ||
11338 MI.findRegisterDefOperandIdx(getXRegFromWReg(MI.getOperand(0).getReg()),
11339 /*TRI=*/nullptr) == -1))
11340 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
11341
11342 if (MI.getOpcode() == AArch64::ORRXrs &&
11343 MI.getOperand(1).getReg() == AArch64::XZR &&
11344 MI.getOperand(3).getImm() == 0x0)
11345 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
11346
11347 return std::nullopt;
11348}
11349
11350std::optional<DestSourcePair>
11352 if ((MI.getOpcode() == AArch64::ORRWrs &&
11353 MI.getOperand(1).getReg() == AArch64::WZR &&
11354 MI.getOperand(3).getImm() == 0x0) ||
11355 (MI.getOpcode() == AArch64::ORRWrr &&
11356 MI.getOperand(1).getReg() == AArch64::WZR))
11357 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
11358 return std::nullopt;
11359}
11360
11361std::optional<RegImmPair>
11362AArch64InstrInfo::isAddImmediate(const MachineInstr &MI, Register Reg) const {
11363 int Sign = 1;
11364 int64_t Offset = 0;
11365
11366 // TODO: Handle cases where Reg is a super- or sub-register of the
11367 // destination register.
11368 const MachineOperand &Op0 = MI.getOperand(0);
11369 if (!Op0.isReg() || Reg != Op0.getReg())
11370 return std::nullopt;
11371
11372 switch (MI.getOpcode()) {
11373 default:
11374 return std::nullopt;
11375 case AArch64::SUBWri:
11376 case AArch64::SUBXri:
11377 case AArch64::SUBSWri:
11378 case AArch64::SUBSXri:
11379 Sign *= -1;
11380 [[fallthrough]];
11381 case AArch64::ADDSWri:
11382 case AArch64::ADDSXri:
11383 case AArch64::ADDWri:
11384 case AArch64::ADDXri: {
11385 // TODO: Third operand can be global address (usually some string).
11386 if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
11387 !MI.getOperand(2).isImm())
11388 return std::nullopt;
11389 int Shift = MI.getOperand(3).getImm();
11390 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
11391 Offset = Sign * (MI.getOperand(2).getImm() << Shift);
11392 }
11393 }
11394 return RegImmPair{MI.getOperand(1).getReg(), Offset};
11395}
11396
11397/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
11398/// the destination register then, if possible, describe the value in terms of
11399/// the source register.
11400static std::optional<ParamLoadedValue>
11402 const TargetInstrInfo *TII,
11403 const TargetRegisterInfo *TRI) {
11404 auto DestSrc = TII->isCopyLikeInstr(MI);
11405 if (!DestSrc)
11406 return std::nullopt;
11407
11408 Register DestReg = DestSrc->Destination->getReg();
11409 Register SrcReg = DestSrc->Source->getReg();
11410
11411 if (!DestReg.isValid() || !SrcReg.isValid())
11412 return std::nullopt;
11413
11414 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
11415
11416 // If the described register is the destination, just return the source.
11417 if (DestReg == DescribedReg)
11418 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
11419
11420 // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
11421 if (MI.getOpcode() == AArch64::ORRWrs &&
11422 TRI->isSuperRegister(DestReg, DescribedReg))
11423 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
11424
11425 // We may need to describe the lower part of a ORRXrs move.
11426 if (MI.getOpcode() == AArch64::ORRXrs &&
11427 TRI->isSubRegister(DestReg, DescribedReg)) {
11428 Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
11429 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
11430 }
11431
11432 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
11433 "Unhandled ORR[XW]rs copy case");
11434
11435 return std::nullopt;
11436}
11437
11438bool AArch64InstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const {
11439 // Functions cannot be split to different sections on AArch64 if they have
11440 // a red zone. This is because relaxing a cross-section branch may require
11441 // incrementing the stack pointer to spill a register, which would overwrite
11442 // the red zone.
11443 if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(true))
11444 return false;
11445
11447}
11448
11449bool AArch64InstrInfo::isMBBSafeToSplitToCold(
11450 const MachineBasicBlock &MBB) const {
11451 // Asm Goto blocks can contain conditional branches to goto labels, which can
11452 // get moved out of range of the branch instruction.
11453 auto isAsmGoto = [](const MachineInstr &MI) {
11454 return MI.getOpcode() == AArch64::INLINEASM_BR;
11455 };
11456 if (llvm::any_of(MBB, isAsmGoto) || MBB.isInlineAsmBrIndirectTarget())
11457 return false;
11458
11459 // Because jump tables are label-relative instead of table-relative, they all
11460 // must be in the same section or relocation fixup handling will fail.
11461
11462 // Check if MBB is a jump table target
11463 const MachineJumpTableInfo *MJTI = MBB.getParent()->getJumpTableInfo();
11464 auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) {
11465 return llvm::is_contained(JTE.MBBs, &MBB);
11466 };
11467 if (MJTI != nullptr && llvm::any_of(MJTI->getJumpTables(), containsMBB))
11468 return false;
11469
11470 // Check if MBB contains a jump table lookup
11471 for (const MachineInstr &MI : MBB) {
11472 switch (MI.getOpcode()) {
11473 case TargetOpcode::G_BRJT:
11474 case AArch64::JumpTableDest32:
11475 case AArch64::JumpTableDest16:
11476 case AArch64::JumpTableDest8:
11477 return false;
11478 default:
11479 continue;
11480 }
11481 }
11482
11483 // MBB isn't a special case, so it's safe to be split to the cold section.
11484 return true;
11485}
11486
11487std::optional<ParamLoadedValue>
11488AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
11489 Register Reg) const {
11490 const MachineFunction *MF = MI.getMF();
11491 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
11492 switch (MI.getOpcode()) {
11493 case AArch64::MOVZWi:
11494 case AArch64::MOVZXi: {
11495 // MOVZWi may be used for producing zero-extended 32-bit immediates in
11496 // 64-bit parameters, so we need to consider super-registers.
11497 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
11498 return std::nullopt;
11499
11500 if (!MI.getOperand(1).isImm())
11501 return std::nullopt;
11502 int64_t Immediate = MI.getOperand(1).getImm();
11503 int Shift = MI.getOperand(2).getImm();
11504 return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
11505 nullptr);
11506 }
11507 case AArch64::ORRWrs:
11508 case AArch64::ORRXrs:
11509 return describeORRLoadedValue(MI, Reg, this, TRI);
11510 }
11511
11513}
11514
11515bool AArch64InstrInfo::isExtendLikelyToBeFolded(
11516 MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
11517 assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||
11518 ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||
11519 ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
11520
11521 // Anyexts are nops.
11522 if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
11523 return true;
11524
11525 Register DefReg = ExtMI.getOperand(0).getReg();
11526 if (!MRI.hasOneNonDBGUse(DefReg))
11527 return false;
11528
11529 // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
11530 // addressing mode.
11531 auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg);
11532 return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
11533}
11534
11535uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
11536 return get(Opc).TSFlags & AArch64::ElementSizeMask;
11537}
11538
11539bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
11540 return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
11541}
11542
11543bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
11544 return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
11545}
11546
11547unsigned int
11548AArch64InstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
11549 return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
11550}
11551
11552bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
11553 unsigned Scale) const {
11554 if (Offset && Scale)
11555 return false;
11556
11557 // Check Reg + Imm
11558 if (!Scale) {
11559 // 9-bit signed offset
11560 if (isInt<9>(Offset))
11561 return true;
11562
11563 // 12-bit unsigned offset
11564 unsigned Shift = Log2_64(NumBytes);
11565 if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
11566 // Must be a multiple of NumBytes (NumBytes is a power of 2)
11567 (Offset >> Shift) << Shift == Offset)
11568 return true;
11569 return false;
11570 }
11571
11572 // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
11573 return Scale == 1 || (Scale > 0 && Scale == NumBytes);
11574}
11575
11577 if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
11578 return AArch64::BLRNoIP;
11579 else
11580 return AArch64::BLR;
11581}
11582
11584 DebugLoc DL) const {
11585 MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator();
11586 auto Builder = BuildMI(MBB, InsertPt, DL, get(AArch64::PAUTH_EPILOGUE))
11588
11589 MachineFunction &MF = *MBB.getParent();
11590 const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
11591 auto &AFL = *static_cast<const AArch64FrameLowering *>(
11592 MF.getSubtarget().getFrameLowering());
11593 if (AFL.getArgumentStackToRestore(MF, MBB)) {
11594 Builder.addReg(AArch64::X17, RegState::ImplicitDefine);
11595 Builder.addReg(AArch64::X16, RegState::ImplicitDefine);
11596 if (Subtarget.hasPAuthLR())
11597 Builder.addReg(AArch64::X15, RegState::ImplicitDefine);
11598 return;
11599 }
11600
11601 if (AFI->branchProtectionPAuthLR() && !Subtarget.hasPAuthLR())
11602 Builder.addReg(AArch64::X16, RegState::ImplicitDefine);
11603}
11604
11606AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI,
11607 Register TargetReg, bool FrameSetup) const {
11608 assert(TargetReg != AArch64::SP && "New top of stack cannot already be in SP");
11609
11610 MachineBasicBlock &MBB = *MBBI->getParent();
11611 MachineFunction &MF = *MBB.getParent();
11612 const AArch64InstrInfo *TII =
11613 MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
11614 int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
11615 DebugLoc DL = MBB.findDebugLoc(MBBI);
11616
11617 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
11618 MachineBasicBlock *LoopTestMBB =
11619 MF.CreateMachineBasicBlock(MBB.getBasicBlock());
11620 MF.insert(MBBInsertPoint, LoopTestMBB);
11621 MachineBasicBlock *LoopBodyMBB =
11622 MF.CreateMachineBasicBlock(MBB.getBasicBlock());
11623 MF.insert(MBBInsertPoint, LoopBodyMBB);
11624 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
11625 MF.insert(MBBInsertPoint, ExitMBB);
11626 MachineInstr::MIFlag Flags =
11628
11629 // LoopTest:
11630 // SUB SP, SP, #ProbeSize
11631 emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
11632 AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags);
11633
11634 // CMP SP, TargetReg
11635 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
11636 AArch64::XZR)
11637 .addReg(AArch64::SP)
11638 .addReg(TargetReg)
11640 .setMIFlags(Flags);
11641
11642 // B.<Cond> LoopExit
11643 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
11645 .addMBB(ExitMBB)
11646 .setMIFlags(Flags);
11647
11648 // LDR XZR, [SP]
11649 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::LDRXui))
11650 .addDef(AArch64::XZR)
11651 .addReg(AArch64::SP)
11652 .addImm(0)
11656 Align(8)))
11657 .setMIFlags(Flags);
11658
11659 // B loop
11660 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
11661 .addMBB(LoopTestMBB)
11662 .setMIFlags(Flags);
11663
11664 // LoopExit:
11665 // MOV SP, TargetReg
11666 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP)
11667 .addReg(TargetReg)
11668 .addImm(0)
11670 .setMIFlags(Flags);
11671
11672 // LDR XZR, [SP]
11673 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::LDRXui))
11674 .addReg(AArch64::XZR, RegState::Define)
11675 .addReg(AArch64::SP)
11676 .addImm(0)
11677 .setMIFlags(Flags);
11678
11679 ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
11681
11682 LoopTestMBB->addSuccessor(ExitMBB);
11683 LoopTestMBB->addSuccessor(LoopBodyMBB);
11684 LoopBodyMBB->addSuccessor(LoopTestMBB);
11685 MBB.addSuccessor(LoopTestMBB);
11686
11687 // Update liveins.
11688 if (MF.getRegInfo().reservedRegsFrozen())
11689 fullyRecomputeLiveIns({ExitMBB, LoopBodyMBB, LoopTestMBB});
11690
11691 return ExitMBB->begin();
11692}
11693
11694namespace {
11695class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
11696 MachineFunction *MF;
11697 const TargetInstrInfo *TII;
11698 const TargetRegisterInfo *TRI;
11699 MachineRegisterInfo &MRI;
11700
11701 /// The block of the loop
11702 MachineBasicBlock *LoopBB;
11703 /// The conditional branch of the loop
11704 MachineInstr *CondBranch;
11705 /// The compare instruction for loop control
11706 MachineInstr *Comp;
11707 /// The number of the operand of the loop counter value in Comp
11708 unsigned CompCounterOprNum;
11709 /// The instruction that updates the loop counter value
11710 MachineInstr *Update;
11711 /// The number of the operand of the loop counter value in Update
11712 unsigned UpdateCounterOprNum;
11713 /// The initial value of the loop counter
11714 Register Init;
11715 /// True iff Update is a predecessor of Comp
11716 bool IsUpdatePriorComp;
11717
11718 /// The normalized condition used by createTripCountGreaterCondition()
11720
11721public:
11722 AArch64PipelinerLoopInfo(MachineBasicBlock *LoopBB, MachineInstr *CondBranch,
11723 MachineInstr *Comp, unsigned CompCounterOprNum,
11724 MachineInstr *Update, unsigned UpdateCounterOprNum,
11725 Register Init, bool IsUpdatePriorComp,
11726 const SmallVectorImpl<MachineOperand> &Cond)
11727 : MF(Comp->getParent()->getParent()),
11728 TII(MF->getSubtarget().getInstrInfo()),
11729 TRI(MF->getSubtarget().getRegisterInfo()), MRI(MF->getRegInfo()),
11730 LoopBB(LoopBB), CondBranch(CondBranch), Comp(Comp),
11731 CompCounterOprNum(CompCounterOprNum), Update(Update),
11732 UpdateCounterOprNum(UpdateCounterOprNum), Init(Init),
11733 IsUpdatePriorComp(IsUpdatePriorComp), Cond(Cond.begin(), Cond.end()) {}
11734
11735 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
11736 // Make the instructions for loop control be placed in stage 0.
11737 // The predecessors of Comp are considered by the caller.
11738 return MI == Comp;
11739 }
11740
11741 std::optional<bool> createTripCountGreaterCondition(
11742 int TC, MachineBasicBlock &MBB,
11743 SmallVectorImpl<MachineOperand> &CondParam) override {
11744 // A branch instruction will be inserted as "if (Cond) goto epilogue".
11745 // Cond is normalized for such use.
11746 // The predecessors of the branch are assumed to have already been inserted.
11747 CondParam = Cond;
11748 return {};
11749 }
11750
11751 void createRemainingIterationsGreaterCondition(
11752 int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond,
11753 DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) override;
11754
11755 void setPreheader(MachineBasicBlock *NewPreheader) override {}
11756
11757 void adjustTripCount(int TripCountAdjust) override {}
11758
11759 bool isMVEExpanderSupported() override { return true; }
11760};
11761} // namespace
11762
11763/// Clone an instruction from MI. The register of ReplaceOprNum-th operand
11764/// is replaced by ReplaceReg. The output register is newly created.
11765/// The other operands are unchanged from MI.
11766static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum,
11767 Register ReplaceReg, MachineBasicBlock &MBB,
11768 MachineBasicBlock::iterator InsertTo) {
11769 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
11770 const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
11771 MachineInstr *NewMI = MBB.getParent()->CloneMachineInstr(MI);
11772 Register Result = 0;
11773 for (unsigned I = 0; I < NewMI->getNumOperands(); ++I) {
11774 if (I == 0 && NewMI->getOperand(0).getReg().isVirtual()) {
11775 Result = MRI.createVirtualRegister(
11776 MRI.getRegClass(NewMI->getOperand(0).getReg()));
11777 NewMI->getOperand(I).setReg(Result);
11778 } else if (I == ReplaceOprNum) {
11779 MRI.constrainRegClass(ReplaceReg, TII->getRegClass(NewMI->getDesc(), I));
11780 NewMI->getOperand(I).setReg(ReplaceReg);
11781 }
11782 }
11783 MBB.insert(InsertTo, NewMI);
11784 return Result;
11785}
11786
11787void AArch64PipelinerLoopInfo::createRemainingIterationsGreaterCondition(
11790 // Create and accumulate conditions for next TC iterations.
11791 // Example:
11792 // SUBSXrr N, counter, implicit-def $nzcv # compare instruction for the last
11793 // # iteration of the kernel
11794 //
11795 // # insert the following instructions
11796 // cond = CSINCXr 0, 0, C, implicit $nzcv
11797 // counter = ADDXri counter, 1 # clone from this->Update
11798 // SUBSXrr n, counter, implicit-def $nzcv # clone from this->Comp
11799 // cond = CSINCXr cond, cond, C, implicit $nzcv
11800 // ... (repeat TC times)
11801 // SUBSXri cond, 0, implicit-def $nzcv
11802
11803 assert(CondBranch->getOpcode() == AArch64::Bcc);
11804 // CondCode to exit the loop
11806 (AArch64CC::CondCode)CondBranch->getOperand(0).getImm();
11807 if (CondBranch->getOperand(1).getMBB() == LoopBB)
11809
11810 // Accumulate conditions to exit the loop
11811 Register AccCond = AArch64::XZR;
11812
11813 // If CC holds, CurCond+1 is returned; otherwise CurCond is returned.
11814 auto AccumulateCond = [&](Register CurCond,
11816 Register NewCond = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
11817 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::CSINCXr))
11818 .addReg(NewCond, RegState::Define)
11819 .addReg(CurCond)
11820 .addReg(CurCond)
11822 return NewCond;
11823 };
11824
11825 if (!LastStage0Insts.empty() && LastStage0Insts[Comp]->getParent() == &MBB) {
11826 // Update and Comp for I==0 are already exists in MBB
11827 // (MBB is an unrolled kernel)
11828 Register Counter;
11829 for (int I = 0; I <= TC; ++I) {
11830 Register NextCounter;
11831 if (I != 0)
11832 NextCounter =
11833 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
11834
11835 AccCond = AccumulateCond(AccCond, CC);
11836
11837 if (I != TC) {
11838 if (I == 0) {
11839 if (Update != Comp && IsUpdatePriorComp) {
11840 Counter =
11841 LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
11842 NextCounter = cloneInstr(Update, UpdateCounterOprNum, Counter, MBB,
11843 MBB.end());
11844 } else {
11845 // can use already calculated value
11846 NextCounter = LastStage0Insts[Update]->getOperand(0).getReg();
11847 }
11848 } else if (Update != Comp) {
11849 NextCounter =
11850 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11851 }
11852 }
11853 Counter = NextCounter;
11854 }
11855 } else {
11856 Register Counter;
11857 if (LastStage0Insts.empty()) {
11858 // use initial counter value (testing if the trip count is sufficient to
11859 // be executed by pipelined code)
11860 Counter = Init;
11861 if (IsUpdatePriorComp)
11862 Counter =
11863 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11864 } else {
11865 // MBB is an epilogue block. LastStage0Insts[Comp] is in the kernel block.
11866 Counter = LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
11867 }
11868
11869 for (int I = 0; I <= TC; ++I) {
11870 Register NextCounter;
11871 NextCounter =
11872 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
11873 AccCond = AccumulateCond(AccCond, CC);
11874 if (I != TC && Update != Comp)
11875 NextCounter =
11876 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11877 Counter = NextCounter;
11878 }
11879 }
11880
11881 // If AccCond == 0, the remainder is greater than TC.
11882 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::SUBSXri))
11883 .addReg(AArch64::XZR, RegState::Define | RegState::Dead)
11884 .addReg(AccCond)
11885 .addImm(0)
11886 .addImm(0);
11887 Cond.clear();
11889}
11890
11891static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB,
11892 Register &RegMBB, Register &RegOther) {
11893 assert(Phi.getNumOperands() == 5);
11894 if (Phi.getOperand(2).getMBB() == MBB) {
11895 RegMBB = Phi.getOperand(1).getReg();
11896 RegOther = Phi.getOperand(3).getReg();
11897 } else {
11898 assert(Phi.getOperand(4).getMBB() == MBB);
11899 RegMBB = Phi.getOperand(3).getReg();
11900 RegOther = Phi.getOperand(1).getReg();
11901 }
11902}
11903
11905 if (!Reg.isVirtual())
11906 return false;
11907 const MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
11908 return MRI.getVRegDef(Reg)->getParent() != BB;
11909}
11910
11911/// If Reg is an induction variable, return true and set some parameters
11912static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB,
11913 MachineInstr *&UpdateInst,
11914 unsigned &UpdateCounterOprNum, Register &InitReg,
11915 bool &IsUpdatePriorComp) {
11916 // Example:
11917 //
11918 // Preheader:
11919 // InitReg = ...
11920 // LoopBB:
11921 // Reg0 = PHI (InitReg, Preheader), (Reg1, LoopBB)
11922 // Reg = COPY Reg0 ; COPY is ignored.
11923 // Reg1 = ADD Reg, #1; UpdateInst. Incremented by a loop invariant value.
11924 // ; Reg is the value calculated in the previous
11925 // ; iteration, so IsUpdatePriorComp == false.
11926
11927 if (LoopBB->pred_size() != 2)
11928 return false;
11929 if (!Reg.isVirtual())
11930 return false;
11931 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
11932 UpdateInst = nullptr;
11933 UpdateCounterOprNum = 0;
11934 InitReg = 0;
11935 IsUpdatePriorComp = true;
11936 Register CurReg = Reg;
11937 while (true) {
11938 MachineInstr *Def = MRI.getVRegDef(CurReg);
11939 if (Def->getParent() != LoopBB)
11940 return false;
11941 if (Def->isCopy()) {
11942 // Ignore copy instructions unless they contain subregisters
11943 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
11944 return false;
11945 CurReg = Def->getOperand(1).getReg();
11946 } else if (Def->isPHI()) {
11947 if (InitReg != 0)
11948 return false;
11949 if (!UpdateInst)
11950 IsUpdatePriorComp = false;
11951 extractPhiReg(*Def, LoopBB, CurReg, InitReg);
11952 } else {
11953 if (UpdateInst)
11954 return false;
11955 switch (Def->getOpcode()) {
11956 case AArch64::ADDSXri:
11957 case AArch64::ADDSWri:
11958 case AArch64::SUBSXri:
11959 case AArch64::SUBSWri:
11960 case AArch64::ADDXri:
11961 case AArch64::ADDWri:
11962 case AArch64::SUBXri:
11963 case AArch64::SUBWri:
11964 UpdateInst = Def;
11965 UpdateCounterOprNum = 1;
11966 break;
11967 case AArch64::ADDSXrr:
11968 case AArch64::ADDSWrr:
11969 case AArch64::SUBSXrr:
11970 case AArch64::SUBSWrr:
11971 case AArch64::ADDXrr:
11972 case AArch64::ADDWrr:
11973 case AArch64::SUBXrr:
11974 case AArch64::SUBWrr:
11975 UpdateInst = Def;
11976 if (isDefinedOutside(Def->getOperand(2).getReg(), LoopBB))
11977 UpdateCounterOprNum = 1;
11978 else if (isDefinedOutside(Def->getOperand(1).getReg(), LoopBB))
11979 UpdateCounterOprNum = 2;
11980 else
11981 return false;
11982 break;
11983 default:
11984 return false;
11985 }
11986 CurReg = Def->getOperand(UpdateCounterOprNum).getReg();
11987 }
11988
11989 if (!CurReg.isVirtual())
11990 return false;
11991 if (Reg == CurReg)
11992 break;
11993 }
11994
11995 if (!UpdateInst)
11996 return false;
11997
11998 return true;
11999}
12000
12001std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
12003 // Accept loops that meet the following conditions
12004 // * The conditional branch is BCC
12005 // * The compare instruction is ADDS/SUBS/WHILEXX
12006 // * One operand of the compare is an induction variable and the other is a
12007 // loop invariant value
12008 // * The induction variable is incremented/decremented by a single instruction
12009 // * Does not contain CALL or instructions which have unmodeled side effects
12010
12011 for (MachineInstr &MI : *LoopBB)
12012 if (MI.isCall() || MI.hasUnmodeledSideEffects())
12013 // This instruction may use NZCV, which interferes with the instruction to
12014 // be inserted for loop control.
12015 return nullptr;
12016
12017 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
12019 if (analyzeBranch(*LoopBB, TBB, FBB, Cond))
12020 return nullptr;
12021
12022 // Infinite loops are not supported
12023 if (TBB == LoopBB && FBB == LoopBB)
12024 return nullptr;
12025
12026 // Must be conditional branch
12027 if (TBB != LoopBB && FBB == nullptr)
12028 return nullptr;
12029
12030 assert((TBB == LoopBB || FBB == LoopBB) &&
12031 "The Loop must be a single-basic-block loop");
12032
12033 MachineInstr *CondBranch = &*LoopBB->getFirstTerminator();
12035
12036 if (CondBranch->getOpcode() != AArch64::Bcc)
12037 return nullptr;
12038
12039 // Normalization for createTripCountGreaterCondition()
12040 if (TBB == LoopBB)
12042
12043 MachineInstr *Comp = nullptr;
12044 unsigned CompCounterOprNum = 0;
12045 for (MachineInstr &MI : reverse(*LoopBB)) {
12046 if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {
12047 // Guarantee that the compare is SUBS/ADDS/WHILEXX and that one of the
12048 // operands is a loop invariant value
12049
12050 switch (MI.getOpcode()) {
12051 case AArch64::SUBSXri:
12052 case AArch64::SUBSWri:
12053 case AArch64::ADDSXri:
12054 case AArch64::ADDSWri:
12055 Comp = &MI;
12056 CompCounterOprNum = 1;
12057 break;
12058 case AArch64::ADDSWrr:
12059 case AArch64::ADDSXrr:
12060 case AArch64::SUBSWrr:
12061 case AArch64::SUBSXrr:
12062 Comp = &MI;
12063 break;
12064 default:
12065 if (isWhileOpcode(MI.getOpcode())) {
12066 Comp = &MI;
12067 break;
12068 }
12069 return nullptr;
12070 }
12071
12072 if (CompCounterOprNum == 0) {
12073 if (isDefinedOutside(Comp->getOperand(1).getReg(), LoopBB))
12074 CompCounterOprNum = 2;
12075 else if (isDefinedOutside(Comp->getOperand(2).getReg(), LoopBB))
12076 CompCounterOprNum = 1;
12077 else
12078 return nullptr;
12079 }
12080 break;
12081 }
12082 }
12083 if (!Comp)
12084 return nullptr;
12085
12086 MachineInstr *Update = nullptr;
12087 Register Init;
12088 bool IsUpdatePriorComp;
12089 unsigned UpdateCounterOprNum;
12090 if (!getIndVarInfo(Comp->getOperand(CompCounterOprNum).getReg(), LoopBB,
12091 Update, UpdateCounterOprNum, Init, IsUpdatePriorComp))
12092 return nullptr;
12093
12094 return std::make_unique<AArch64PipelinerLoopInfo>(
12095 LoopBB, CondBranch, Comp, CompCounterOprNum, Update, UpdateCounterOprNum,
12096 Init, IsUpdatePriorComp, Cond);
12097}
12098
12099/// verifyInstruction - Perform target specific instruction verification.
12100bool AArch64InstrInfo::verifyInstruction(const MachineInstr &MI,
12101 StringRef &ErrInfo) const {
12102 // Verify that immediate offsets on load/store instructions are within range.
12103 // Stack objects with an FI operand are excluded as they can be fixed up
12104 // during PEI.
12105 TypeSize Scale(0U, false), Width(0U, false);
12106 int64_t MinOffset, MaxOffset;
12107 if (getMemOpInfo(MI.getOpcode(), Scale, Width, MinOffset, MaxOffset)) {
12108 unsigned ImmIdx = getLoadStoreImmIdx(MI.getOpcode());
12109 if (MI.getOperand(ImmIdx).isImm() && !MI.getOperand(ImmIdx - 1).isFI()) {
12110 int64_t Imm = MI.getOperand(ImmIdx).getImm();
12111 if (Imm < MinOffset || Imm > MaxOffset) {
12112 ErrInfo = "Unexpected immediate on load/store instruction";
12113 return false;
12114 }
12115 }
12116 }
12117
12118 const MCInstrDesc &MCID = MI.getDesc();
12119 for (unsigned Op = 0; Op < MCID.getNumOperands(); Op++) {
12120 const MachineOperand &MO = MI.getOperand(Op);
12121 switch (MCID.operands()[Op].OperandType) {
12123 if (!MO.isImm() || MO.getImm() != 0) {
12124 ErrInfo = "OPERAND_IMPLICIT_IMM_0 should be 0";
12125 return false;
12126 }
12127 break;
12129 if (!MO.isImm() ||
12131 (AArch64_AM::getShiftValue(MO.getImm()) != 8 &&
12132 AArch64_AM::getShiftValue(MO.getImm()) != 16)) {
12133 ErrInfo = "OPERAND_SHIFT_MSL should be msl shift of 8 or 16";
12134 return false;
12135 }
12136 break;
12137 default:
12138 break;
12139 }
12140 }
12141 return true;
12142}
12143
12144#define GET_INSTRINFO_HELPERS
12145#define GET_INSTRMAP_INFO
12146#include "AArch64GenInstrInfo.inc"
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, unsigned NumRegs)
static cl::opt< unsigned > BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)"))
static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned MnegOpc, const TargetRegisterClass *RC)
genNeg - Helper to generate an intermediate negation of the second operand of Root
static bool isFrameStoreOpcode(int Opcode)
static cl::opt< unsigned > GatherOptSearchLimit("aarch64-search-limit", cl::Hidden, cl::init(2048), cl::desc("Restrict range of instructions to search for the " "machine-combiner gather pattern optimization"))
static bool getMaddPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find instructions that can be turned into madd.
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr)
Find a condition code used by the instruction.
static MachineInstr * genFusedMultiplyAcc(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyAcc - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genFusedMultiplyAccNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCombineInstrCandidate64(unsigned Opc)
static bool isFrameLoadOpcode(int Opcode)
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg)
static bool areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI, const AccessKind AccessToCheck=AK_All)
True when condition flags are accessed (either by writing or reading) on the instruction trace starti...
static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Floating-Point Support.
static bool isADDSRegImm(unsigned Opcode)
static bool isCheapCopy(const MachineInstr &MI, const AArch64RegisterInfo &RI)
static bool isANDOpcode(MachineInstr &MI)
static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment, StringRef RegScale={})
static unsigned sForm(MachineInstr &Instr)
Get opcode of S version of Instr.
static bool isCombineInstrSettingFlag(unsigned Opc)
static bool getFNEGPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB, MachineInstr *&UpdateInst, unsigned &UpdateCounterOprNum, Register &InitReg, bool &IsUpdatePriorComp)
If Reg is an induction variable, return true and set some parameters.
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc)
static bool mustAvoidNeonAtMBBI(const AArch64Subtarget &Subtarget, MachineBasicBlock &MBB, MachineBasicBlock::iterator I)
Returns true if in a streaming call site region without SME-FA64.
static bool isPostIndexLdStOpcode(unsigned Opcode)
Return true if the opcode is a post-index ld/st instruction, which really loads from base+0.
static std::optional< unsigned > getLFIInstSizeInBytes(const MachineInstr &MI)
Return the maximum number of bytes of code the specified instruction may be after LFI rewriting.
static unsigned getBranchDisplacementBits(unsigned Opc)
static cl::opt< unsigned > CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9), cl::desc("Restrict range of CB instructions (DEBUG)"))
static std::optional< ParamLoadedValue > describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
If the given ORR instruction is a copy, and DescribedReg overlaps with the destination register then,...
static bool getFMULPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static void appendReadRegExpr(SmallVectorImpl< char > &Expr, unsigned RegNum)
static MachineInstr * genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, const TargetRegisterClass *RC)
genMaddR - Generate madd instruction and combine mul and add using an extra virtual register Example ...
static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, Register ReplaceReg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertTo)
Clone an instruction from MI.
static bool scaleOffset(unsigned Opc, int64_t &Offset)
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc)
unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale)
static MachineInstr * genFusedMultiplyIdx(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyIdx - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genIndexedMultiply(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxDupOp, unsigned MulOpc, const TargetRegisterClass *RC, MachineRegisterInfo &MRI)
Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
static bool isSUBSRegImm(unsigned Opcode)
static bool UpdateOperandRegClass(MachineInstr &Instr)
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
static bool isInStreamingCallSiteRegion(MachineBasicBlock &MBB, MachineBasicBlock::iterator I)
Returns true if the instruction at I is in a streaming call site region, within a single basic block.
static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr, int CmpValue, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > &CCUseInstrs, bool &IsInvertCC)
unsigned unscaledOffsetOpcode(unsigned Opcode)
static bool getLoadPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Search for patterns of LD instructions we can optimize.
static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI)
Check if CmpInstr can be substituted by MI.
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC)
static bool isCombineInstrCandidateFP(const MachineInstr &Inst)
static void appendLoadRegExpr(SmallVectorImpl< char > &Expr, int64_t OffsetFromDefCFA)
static void appendConstantExpr(SmallVectorImpl< char > &Expr, int64_t Constant, dwarf::LocationAtom Operation)
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI)
Return the opcode that does not set flags when possible - otherwise return the original opcode.
static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool isCombineInstrCandidate32(unsigned Opc)
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
static unsigned offsetExtendOpcode(unsigned Opcode)
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ UnsafeRegsDead
static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register DestReg, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
static void generateGatherLanePattern(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned Pattern, unsigned NumLanes)
Generate optimized instruction sequence for gather load patterns to improve Memory-Level Parallelism ...
static bool getMiscPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find other MI combine patterns.
static bool outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, MCRegister Reg, unsigned SubIdx, RegState State, const TargetRegisterInfo *TRI)
static bool outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, int64_t Offset1, unsigned Opcode1, int FI2, int64_t Offset2, unsigned Opcode2)
static cl::opt< unsigned > TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"))
static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB, Register &RegMBB, Register &RegOther)
static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, unsigned Reg, const StackOffset &Offset)
static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB)
static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind=FMAInstKind::Default, const Register *ReplacedAddend=nullptr)
genFusedMultiply - Generate fused multiply instructions.
static bool getGatherLanePattern(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, unsigned LoadLaneOpCode, unsigned NumLanes)
Check if the given instruction forms a gather load pattern that can be optimized for better Memory-Le...
static MachineInstr * genFusedMultiplyIdxNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCombineInstrCandidate(unsigned Opc)
static unsigned regOffsetOpcode(unsigned Opcode)
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
static cl::opt< unsigned > BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26), cl::desc("Restrict range of B instructions (DEBUG)"))
static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB)
Check if AArch64::NZCV should be alive in successors of MBB.
static void emitFrameOffsetAdj(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int64_t Offset, unsigned Opc, const TargetInstrInfo *TII, MachineInstr::MIFlag Flag, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFAOffset, StackOffset CFAOffset, unsigned FrameReg)
static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize)
static cl::opt< unsigned > CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"))
static void genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, unsigned IdxOpd1, DenseMap< Register, unsigned > &InstrIdxForVirtReg)
Do the following transformation A - (B + C) ==> (A - B) - C A - (B + C) ==> (A - C) - B.
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewReg=nullptr)
static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64InstrInfo *TII, bool ShouldSignReturnAddr)
static MachineInstr * genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs)
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc, unsigned ZeroReg)
static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register SrcReg, bool IsKill, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
DXIL Forward Handle Accesses
@ Default
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
PowerPC Reduce CR logical Operation
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:484
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition Debug.h:72
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc=0)
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
SignReturnAddress getSignReturnAddressCondition() const
void setOutliningStyle(const std::string &Style)
std::optional< bool > hasRedZone() const
static bool shouldSignReturnAddress(SignReturnAddress Condition, bool IsLRSpilled)
static bool isHForm(const MachineInstr &MI)
Returns whether the instruction is in H form (16 bit operands)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool hasBTISemantics(const MachineInstr &MI)
Returns whether the instruction can be compatible with non-zero BTYPE.
static bool isQForm(const MachineInstr &MI)
Returns whether the instruction is in Q form (128 bit operands)
static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, int64_t &MinOffset, int64_t &MaxOffset)
Returns true if opcode Opc is a memory operation.
static bool isTailCallReturnInst(const MachineInstr &MI)
Returns true if MI is one of the TCRETURN* instructions.
static bool isFPRCopy(const MachineInstr &MI)
Does this instruction rename an FPR without modifying bits?
MachineInstr * emitLdStWithAddr(MachineInstr &MemI, const ExtAddrMode &AM) const override
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
static bool isZExtLoad(const MachineInstr &MI)
Returns whether the instruction is a zero-extending load.
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
static bool isGPRCopy(const MachineInstr &MI)
Does this instruction rename a GPR without modifying bits?
static unsigned convertToFlagSettingOpc(unsigned Opc)
Return the opcode that set flags when possible.
void createPauthEpilogueInstr(MachineBasicBlock &MBB, DebugLoc DL) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
Register isLoadFromStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
Check for post-frame ptr elimination stack locations as well.
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
static std::optional< unsigned > getUnscaledLdSt(unsigned Opc)
Returns the unscaled load/store for the scaled load/store opcode, if there is a corresponding unscale...
static bool hasUnscaledLdStOffset(unsigned Opc)
Return true if it has an unscaled load/store offset.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
std::optional< ExtAddrMode > getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &MI, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
bool analyzeBranchPredicate(MachineBasicBlock &MBB, MachineBranchPredicate &MBP, bool AllowModify) const override
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, unsigned SubReg=0, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isPairableLdStInst(const MachineInstr &MI)
Return true if pairing the given load or store may be paired with another.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isSExtLoad(const MachineInstr &MI)
Returns whether the instruction is a sign-extending load.
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
static bool isPreSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed store.
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
AArch64InstrInfo(const AArch64Subtarget &STI)
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, int FrameIndex, MachineInstr *&CopyMI, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
bool getMemOperandWithOffsetWidth(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, TypeSize &Width, const TargetRegisterInfo *TRI) const
If OffsetIsScalable is set to 'true', the offset is scaled by vscale.
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isStridedAccess(const MachineInstr &MI)
Return true if the given load or store is a strided memory access.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Detect opportunities for ldp/stp formation.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
bool isThroughputPattern(unsigned Pattern) const override
Return true when a code sequence can improve throughput.
MachineOperand & getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const
Return the immediate offset of the base register in a load/store LdSt.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, const MachineInstr &AddrI, ExtAddrMode &AM) const override
static bool isLdStPairSuppressed(const MachineInstr &MI)
Return true if pairing the given load or store is hinted to be unprofitable.
Register isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
Check for post-frame ptr elimination stack locations as well.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction supplying the argument to the comparison into one that...
static unsigned getLoadStoreImmIdx(unsigned Opc)
Returns the index for the immediate for a given instruction.
static bool isGPRZero(const MachineInstr &MI)
Does this instruction set its full destination register to zero?
void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, unsigned ZeroReg, llvm::ArrayRef< unsigned > Indices) const
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2,...
CombinerObjective getCombinerObjective(unsigned Pattern) const override
static bool isFpOrNEON(Register Reg)
Returns whether the physical register is FP or NEON.
bool isAsCheapAsAMove(const MachineInstr &MI) const override
std::optional< DestSourcePair > isCopyLikeInstrImpl(const MachineInstr &MI) const override
static void suppressLdStPair(MachineInstr &MI)
Hint that pairing the given load or store is unprofitable.
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static bool isPreLd(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load.
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef< unsigned > Indices) const
bool optimizeCondBranch(MachineInstr &MI) const override
Replace csincr-branch sequence by simple conditional branch.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
bool isCandidateToMergeOrPair(const MachineInstr &MI) const
Return true if this is a load/store that can be potentially paired/merged.
MCInst getNop() const override
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const
const AArch64RegisterInfo * getRegisterInfo() const override
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & front() const
Get the first element.
Definition ArrayRef.h:144
size_t size() const
Get the array size.
Definition ArrayRef.h:141
This is an important base class in LLVM.
Definition Constant.h:43
A debug info location.
Definition DebugLoc.h:126
bool empty() const
Definition DenseMap.h:173
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:286
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:688
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:685
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
static LocationSize precise(uint64_t Value)
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition MCAsmInfo.h:66
bool usesWindowsCFI() const
Definition MCAsmInfo.h:674
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition MCDwarf.h:615
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition MCDwarf.h:657
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition MCDwarf.h:630
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition MCDwarf.h:727
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
constexpr bool isValid() const
Definition MCRegister.h:84
static constexpr unsigned NoRegister
Definition MCRegister.h:60
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1554
Set of metadata that should be preserved when using BuildMI().
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
reverse_instr_iterator instr_rbegin()
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
void setStackID(int ObjectIdx, uint8_t ID)
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
bool isCopy() const
const MachineBasicBlock * getParent() const
bool isCall(QueryType Type=AnyInBundle) const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
LLVM_ABI uint32_t mergeFlagsWith(const MachineInstr &Other) const
Return the MIFlags which represent both MachineInstrs.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool registerDefIsDead(Register Reg, const TargetRegisterInfo *TRI) const
Returns true if the register is dead in this machine instruction.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
LLVM_ABI bool isLoadFoldBarrier() const
Returns true if it is illegal to fold a load across this instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
A description of a memory reference used in the backend.
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
LLVM_ABI MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
bool def_empty(Register RegNo) const
def_empty - Return true if there are no instructions defining the specified register (it may be live-...
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
bool hasOneDef(Register RegNo) const
Return true if there is exactly one operand defining the specified register.
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
MI-level patchpoint operands.
Definition StackMaps.h:77
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition StackMaps.h:105
Wrapper class representing virtual and physical registers.
Definition Register.h:20
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:107
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:66
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
Represents a location in source code.
Definition SMLoc.h:22
bool erase(PtrType Ptr)
Remove pointer from the set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
bool empty() const
Definition SmallSet.h:169
bool erase(const T &V)
Definition SmallSet.h:200
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
MI-level stackmap operands.
Definition StackMaps.h:36
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition StackMaps.h:51
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:46
int64_t getScalable() const
Returns the scalable component of the stack.
Definition TypeSize.h:49
static StackOffset get(int64_t Fixed, int64_t Scalable)
Definition TypeSize.h:41
static StackOffset getScalable(int64_t Scalable)
Definition TypeSize.h:40
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:39
MI-level Statepoint operands.
Definition StackMaps.h:159
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given statepoint should emit.
Definition StackMaps.h:208
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual bool isFunctionSafeToSplit(const MachineFunction &MF) const
Return true if the function is a viable candidate for machine function splitting.
const MCAsmInfo & getMCAsmInfo() const
Return target specific asm information.
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Target - Wrapper for Target specific information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
Value * getOperand(unsigned i) const
Definition User.h:207
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an std::string.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_S
MO_S - Indicates that the bits of the symbol operand represented by MO_G0 etc are signed.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_ARM64EC_CALLMANGLE
MO_ARM64EC_CALLMANGLE - Operand refers to the Arm64EC-mangled version of a symbol,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_HI12
MO_HI12 - This flag indicates that a symbol operand represents the bits 13-24 of a 64-bit address,...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
unsigned getCheckerSizeInBytes(AuthCheckMethod Method)
Returns the number of bytes added by checkAuthenticatedRegister.
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
constexpr bool isLegalArithImmed(const uint64_t C)
isLegalArithImmed -
static unsigned getArithShiftValue(unsigned Imm)
getArithShiftValue - get the arithmetic shift value.
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static AArch64_AM::ShiftExtendType getExtendType(unsigned Imm)
getExtendType - Extract the extend type for operands of arithmetic ops.
static AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void expandMOVAddr(unsigned Opcode, unsigned TargetFlags, bool IsTargetMachO, SmallVectorImpl< AddrInsnModel > &Insn)
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static const uint64_t InstrFlagIsWhile
static const uint64_t InstrFlagIsPTestLike
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
initializer< Ty > init(const Ty &Val)
constexpr double e
InstrType
Represents how an instruction should be mapped by the outliner.
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389
iterator end() const
Definition BasicBlock.h:89
LLVM_ABI Instruction & back() const
LLVM_ABI iterator begin() const
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:573
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
static bool isCondBranchOpcode(int Opc)
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
static bool isPTrueOpcode(unsigned Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool succeeded(LogicalResult Result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
RegState
Flags to represent properties of register accesses.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ Define
Register definition.
@ Renamable
Register that may be renamed.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2554
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
static bool isIndirectBranchOpcode(int Opc)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
@ AArch64FrameOffsetIsLegal
Offset is legal.
@ AArch64FrameOffsetCanUpdate
Offset can apply, at least partly.
@ AArch64FrameOffsetCannotUpdate
Offset cannot apply.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
Op::Description Desc
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
static bool isSEHInstruction(const MachineInstr &MI)
bool isLFIPrePostMemAccess(unsigned Opcode)
Returns true if Opcode is a pre- or post-indexed memory access that the LFI rewriter expands with a b...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
AArch64MachineCombinerPattern
@ MULSUBv8i16_OP2
@ FMULv4i16_indexed_OP1
@ FMLSv1i32_indexed_OP2
@ MULSUBv2i32_indexed_OP1
@ FMLAv2i32_indexed_OP2
@ MULADDv4i16_indexed_OP2
@ FMLAv1i64_indexed_OP1
@ MULSUBv16i8_OP1
@ FMLAv8i16_indexed_OP2
@ FMULv2i32_indexed_OP1
@ MULSUBv8i16_indexed_OP2
@ FMLAv1i64_indexed_OP2
@ MULSUBv4i16_indexed_OP2
@ FMLAv1i32_indexed_OP1
@ FMLAv2i64_indexed_OP2
@ FMLSv8i16_indexed_OP1
@ MULSUBv2i32_OP1
@ FMULv4i16_indexed_OP2
@ MULSUBv4i32_indexed_OP2
@ FMULv2i64_indexed_OP2
@ FMLAv4i32_indexed_OP1
@ MULADDv4i16_OP2
@ FMULv8i16_indexed_OP2
@ MULSUBv4i16_OP1
@ MULADDv4i32_OP2
@ MULADDv2i32_OP2
@ MULADDv16i8_OP2
@ FMLSv4i16_indexed_OP1
@ MULADDv16i8_OP1
@ FMLAv2i64_indexed_OP1
@ FMLAv1i32_indexed_OP2
@ FMLSv2i64_indexed_OP2
@ MULADDv2i32_OP1
@ MULADDv4i32_OP1
@ MULADDv2i32_indexed_OP1
@ MULSUBv16i8_OP2
@ MULADDv4i32_indexed_OP1
@ MULADDv2i32_indexed_OP2
@ FMLAv4i16_indexed_OP2
@ MULSUBv8i16_OP1
@ FMULv2i32_indexed_OP2
@ FMLSv2i32_indexed_OP2
@ FMLSv4i32_indexed_OP1
@ FMULv2i64_indexed_OP1
@ MULSUBv4i16_OP2
@ FMLSv4i16_indexed_OP2
@ FMLAv2i32_indexed_OP1
@ FMLSv2i32_indexed_OP1
@ FMLAv8i16_indexed_OP1
@ MULSUBv4i16_indexed_OP1
@ FMLSv4i32_indexed_OP2
@ MULADDv4i32_indexed_OP2
@ MULSUBv4i32_OP2
@ MULSUBv8i16_indexed_OP1
@ MULADDv8i16_OP2
@ MULSUBv2i32_indexed_OP2
@ FMULv4i32_indexed_OP2
@ FMLSv2i64_indexed_OP1
@ MULADDv4i16_OP1
@ FMLAv4i32_indexed_OP2
@ MULADDv8i16_indexed_OP1
@ FMULv4i32_indexed_OP1
@ FMLAv4i16_indexed_OP1
@ FMULv8i16_indexed_OP1
@ MULADDv8i16_OP1
@ MULSUBv4i32_indexed_OP1
@ MULSUBv4i32_OP1
@ FMLSv8i16_indexed_OP2
@ MULADDv8i16_indexed_OP2
@ MULSUBv2i32_OP2
@ FMLSv1i64_indexed_OP2
@ MULADDv4i16_indexed_OP1
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr RegState getDefRegState(bool B)
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:322
static MCRegister getXRegFromWReg(MCRegister Reg)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA, std::optional< int64_t > IncomingVGOffsetFromDefCFA)
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
static bool isUncondBranchOpcode(int Opc)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII)
rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
static const MachineMemOperand::Flags MOSuppressPair
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
void appendLEB128(SmallVectorImpl< U > &Buffer, T Value)
Definition LEB128.h:236
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Next
Definition InstrProf.h:147
bool optimizeTerminators(MachineBasicBlock *MBB, const TargetInstrInfo &TII)
std::pair< MachineOperand, DIExpression * > ParamLoadedValue
bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, const MachineInstr &UseMI, const TargetRegisterInfo *TRI)
Return true if there is an instruction /after/ DefMI and before UseMI which either reads or clobbers ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
static const MachineMemOperand::Flags MOStridedAccess
constexpr RegState getUndefRegState(bool B)
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
LLVM_ABI static const MBBSectionID ColdSectionID
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
An individual sequence of instructions to be replaced with a call to an outlined function.
MachineFunction * getMF() const
The information necessary to create an outlined function for some class of candidate.