LLVM 22.0.0git
AArch64InstrInfo.cpp
Go to the documentation of this file.
1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64ExpandImm.h"
16#include "AArch64PointerAuth.h"
17#include "AArch64Subtarget.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/SmallSet.h"
43#include "llvm/IR/DebugLoc.h"
44#include "llvm/IR/GlobalValue.h"
45#include "llvm/IR/Module.h"
46#include "llvm/MC/MCAsmInfo.h"
47#include "llvm/MC/MCInst.h"
49#include "llvm/MC/MCInstrDesc.h"
54#include "llvm/Support/LEB128.h"
58#include <cassert>
59#include <cstdint>
60#include <iterator>
61#include <utility>
62
63using namespace llvm;
64
65#define GET_INSTRINFO_CTOR_DTOR
66#include "AArch64GenInstrInfo.inc"
67
69 CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9),
70 cl::desc("Restrict range of CB instructions (DEBUG)"));
71
73 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
74 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
75
77 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
78 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
79
81 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
82 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
83
85 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
86 cl::desc("Restrict range of B instructions (DEBUG)"));
87
89 "aarch64-search-limit", cl::Hidden, cl::init(2048),
90 cl::desc("Restrict range of instructions to search for the "
91 "machine-combiner gather pattern optimization"));
92
94 : AArch64GenInstrInfo(STI, AArch64::ADJCALLSTACKDOWN,
95 AArch64::ADJCALLSTACKUP, AArch64::CATCHRET),
96 RI(STI.getTargetTriple(), STI.getHwMode()), Subtarget(STI) {}
97
98/// GetInstSize - Return the number of bytes of code the specified
99/// instruction may be. This returns the maximum number of bytes.
101 const MachineBasicBlock &MBB = *MI.getParent();
102 const MachineFunction *MF = MBB.getParent();
103 const Function &F = MF->getFunction();
104 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
105
106 {
107 auto Op = MI.getOpcode();
108 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
109 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
110 }
111
112 // Meta-instructions emit no code.
113 if (MI.isMetaInstruction())
114 return 0;
115
116 // FIXME: We currently only handle pseudoinstructions that don't get expanded
117 // before the assembly printer.
118 unsigned NumBytes = 0;
119 const MCInstrDesc &Desc = MI.getDesc();
120
121 if (!MI.isBundle() && isTailCallReturnInst(MI)) {
122 NumBytes = Desc.getSize() ? Desc.getSize() : 4;
123
124 const auto *MFI = MF->getInfo<AArch64FunctionInfo>();
125 if (!MFI->shouldSignReturnAddress(*MF))
126 return NumBytes;
127
128 const auto &STI = MF->getSubtarget<AArch64Subtarget>();
129 auto Method = STI.getAuthenticatedLRCheckMethod(*MF);
130 NumBytes += AArch64PAuth::getCheckerSizeInBytes(Method);
131 return NumBytes;
132 }
133
134 // Size should be preferably set in
135 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
136 // Specific cases handle instructions of variable sizes
137 switch (Desc.getOpcode()) {
138 default:
139 if (Desc.getSize())
140 return Desc.getSize();
141
142 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
143 // with fixed constant size but not specified in .td file) is a normal
144 // 4-byte insn.
145 NumBytes = 4;
146 break;
147 case TargetOpcode::STACKMAP:
148 // The upper bound for a stackmap intrinsic is the full length of its shadow
149 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
150 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
151 break;
152 case TargetOpcode::PATCHPOINT:
153 // The size of the patchpoint intrinsic is the number of bytes requested
154 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
155 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
156 break;
157 case TargetOpcode::STATEPOINT:
158 NumBytes = StatepointOpers(&MI).getNumPatchBytes();
159 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
160 // No patch bytes means a normal call inst is emitted
161 if (NumBytes == 0)
162 NumBytes = 4;
163 break;
164 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
165 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
166 // instructions are expanded to the specified number of NOPs. Otherwise,
167 // they are expanded to 36-byte XRay sleds.
168 NumBytes =
169 F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
170 break;
171 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
172 case TargetOpcode::PATCHABLE_TAIL_CALL:
173 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
174 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
175 NumBytes = 36;
176 break;
177 case TargetOpcode::PATCHABLE_EVENT_CALL:
178 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
179 NumBytes = 24;
180 break;
181
182 case AArch64::SPACE:
183 NumBytes = MI.getOperand(1).getImm();
184 break;
185 case TargetOpcode::BUNDLE:
186 NumBytes = getInstBundleLength(MI);
187 break;
188 }
189
190 return NumBytes;
191}
192
193unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
194 unsigned Size = 0;
196 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
197 while (++I != E && I->isInsideBundle()) {
198 assert(!I->isBundle() && "No nested bundle!");
200 }
201 return Size;
202}
203
206 // Block ends with fall-through condbranch.
207 switch (LastInst->getOpcode()) {
208 default:
209 llvm_unreachable("Unknown branch instruction?");
210 case AArch64::Bcc:
211 Target = LastInst->getOperand(1).getMBB();
212 Cond.push_back(LastInst->getOperand(0));
213 break;
214 case AArch64::CBZW:
215 case AArch64::CBZX:
216 case AArch64::CBNZW:
217 case AArch64::CBNZX:
218 Target = LastInst->getOperand(1).getMBB();
219 Cond.push_back(MachineOperand::CreateImm(-1));
220 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
221 Cond.push_back(LastInst->getOperand(0));
222 break;
223 case AArch64::TBZW:
224 case AArch64::TBZX:
225 case AArch64::TBNZW:
226 case AArch64::TBNZX:
227 Target = LastInst->getOperand(2).getMBB();
228 Cond.push_back(MachineOperand::CreateImm(-1));
229 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
230 Cond.push_back(LastInst->getOperand(0));
231 Cond.push_back(LastInst->getOperand(1));
232 break;
233 case AArch64::CBWPri:
234 case AArch64::CBXPri:
235 case AArch64::CBWPrr:
236 case AArch64::CBXPrr:
237 Target = LastInst->getOperand(3).getMBB();
238 Cond.push_back(MachineOperand::CreateImm(-1));
239 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
240 Cond.push_back(LastInst->getOperand(0));
241 Cond.push_back(LastInst->getOperand(1));
242 Cond.push_back(LastInst->getOperand(2));
243 break;
244 }
245}
246
247static unsigned getBranchDisplacementBits(unsigned Opc) {
248 switch (Opc) {
249 default:
250 llvm_unreachable("unexpected opcode!");
251 case AArch64::B:
252 return BDisplacementBits;
253 case AArch64::TBNZW:
254 case AArch64::TBZW:
255 case AArch64::TBNZX:
256 case AArch64::TBZX:
257 return TBZDisplacementBits;
258 case AArch64::CBNZW:
259 case AArch64::CBZW:
260 case AArch64::CBNZX:
261 case AArch64::CBZX:
262 return CBZDisplacementBits;
263 case AArch64::Bcc:
264 return BCCDisplacementBits;
265 case AArch64::CBWPri:
266 case AArch64::CBXPri:
267 case AArch64::CBWPrr:
268 case AArch64::CBXPrr:
269 return CBDisplacementBits;
270 }
271}
272
274 int64_t BrOffset) const {
275 unsigned Bits = getBranchDisplacementBits(BranchOp);
276 assert(Bits >= 3 && "max branch displacement must be enough to jump"
277 "over conditional branch expansion");
278 return isIntN(Bits, BrOffset / 4);
279}
280
283 switch (MI.getOpcode()) {
284 default:
285 llvm_unreachable("unexpected opcode!");
286 case AArch64::B:
287 return MI.getOperand(0).getMBB();
288 case AArch64::TBZW:
289 case AArch64::TBNZW:
290 case AArch64::TBZX:
291 case AArch64::TBNZX:
292 return MI.getOperand(2).getMBB();
293 case AArch64::CBZW:
294 case AArch64::CBNZW:
295 case AArch64::CBZX:
296 case AArch64::CBNZX:
297 case AArch64::Bcc:
298 return MI.getOperand(1).getMBB();
299 case AArch64::CBWPri:
300 case AArch64::CBXPri:
301 case AArch64::CBWPrr:
302 case AArch64::CBXPrr:
303 return MI.getOperand(3).getMBB();
304 }
305}
306
308 MachineBasicBlock &NewDestBB,
309 MachineBasicBlock &RestoreBB,
310 const DebugLoc &DL,
311 int64_t BrOffset,
312 RegScavenger *RS) const {
313 assert(RS && "RegScavenger required for long branching");
314 assert(MBB.empty() &&
315 "new block should be inserted for expanding unconditional branch");
316 assert(MBB.pred_size() == 1);
317 assert(RestoreBB.empty() &&
318 "restore block should be inserted for restoring clobbered registers");
319
320 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
321 // Offsets outside of the signed 33-bit range are not supported for ADRP +
322 // ADD.
323 if (!isInt<33>(BrOffset))
325 "Branch offsets outside of the signed 33-bit range not supported");
326
327 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
328 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
329 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
330 .addReg(Reg)
331 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
332 .addImm(0);
333 BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
334 };
335
336 RS->enterBasicBlockEnd(MBB);
337 // If X16 is unused, we can rely on the linker to insert a range extension
338 // thunk if NewDestBB is out of range of a single B instruction.
339 constexpr Register Reg = AArch64::X16;
340 if (!RS->isRegUsed(Reg)) {
341 insertUnconditionalBranch(MBB, &NewDestBB, DL);
342 RS->setRegUsed(Reg);
343 return;
344 }
345
346 // If there's a free register and it's worth inflating the code size,
347 // manually insert the indirect branch.
348 Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
349 if (Scavenged != AArch64::NoRegister &&
350 MBB.getSectionID() == MBBSectionID::ColdSectionID) {
351 buildIndirectBranch(Scavenged, NewDestBB);
352 RS->setRegUsed(Scavenged);
353 return;
354 }
355
356 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
357 // with red zones.
358 AArch64FunctionInfo *AFI = MBB.getParent()->getInfo<AArch64FunctionInfo>();
359 if (!AFI || AFI->hasRedZone().value_or(true))
361 "Unable to insert indirect branch inside function that has red zone");
362
363 // Otherwise, spill X16 and defer range extension to the linker.
364 BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
365 .addReg(AArch64::SP, RegState::Define)
366 .addReg(Reg)
367 .addReg(AArch64::SP)
368 .addImm(-16);
369
370 BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
371
372 BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
373 .addReg(AArch64::SP, RegState::Define)
375 .addReg(AArch64::SP)
376 .addImm(16);
377}
378
379// Branch analysis.
382 MachineBasicBlock *&FBB,
384 bool AllowModify) const {
385 // If the block has no terminators, it just falls into the block after it.
386 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
387 if (I == MBB.end())
388 return false;
389
390 // Skip over SpeculationBarrierEndBB terminators
391 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
392 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
393 --I;
394 }
395
396 if (!isUnpredicatedTerminator(*I))
397 return false;
398
399 // Get the last instruction in the block.
400 MachineInstr *LastInst = &*I;
401
402 // If there is only one terminator instruction, process it.
403 unsigned LastOpc = LastInst->getOpcode();
404 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
405 if (isUncondBranchOpcode(LastOpc)) {
406 TBB = LastInst->getOperand(0).getMBB();
407 return false;
408 }
409 if (isCondBranchOpcode(LastOpc)) {
410 // Block ends with fall-through condbranch.
411 parseCondBranch(LastInst, TBB, Cond);
412 return false;
413 }
414 return true; // Can't handle indirect branch.
415 }
416
417 // Get the instruction before it if it is a terminator.
418 MachineInstr *SecondLastInst = &*I;
419 unsigned SecondLastOpc = SecondLastInst->getOpcode();
420
421 // If AllowModify is true and the block ends with two or more unconditional
422 // branches, delete all but the first unconditional branch.
423 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
424 while (isUncondBranchOpcode(SecondLastOpc)) {
425 LastInst->eraseFromParent();
426 LastInst = SecondLastInst;
427 LastOpc = LastInst->getOpcode();
428 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
429 // Return now the only terminator is an unconditional branch.
430 TBB = LastInst->getOperand(0).getMBB();
431 return false;
432 }
433 SecondLastInst = &*I;
434 SecondLastOpc = SecondLastInst->getOpcode();
435 }
436 }
437
438 // If we're allowed to modify and the block ends in a unconditional branch
439 // which could simply fallthrough, remove the branch. (Note: This case only
440 // matters when we can't understand the whole sequence, otherwise it's also
441 // handled by BranchFolding.cpp.)
442 if (AllowModify && isUncondBranchOpcode(LastOpc) &&
443 MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) {
444 LastInst->eraseFromParent();
445 LastInst = SecondLastInst;
446 LastOpc = LastInst->getOpcode();
447 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
448 assert(!isUncondBranchOpcode(LastOpc) &&
449 "unreachable unconditional branches removed above");
450
451 if (isCondBranchOpcode(LastOpc)) {
452 // Block ends with fall-through condbranch.
453 parseCondBranch(LastInst, TBB, Cond);
454 return false;
455 }
456 return true; // Can't handle indirect branch.
457 }
458 SecondLastInst = &*I;
459 SecondLastOpc = SecondLastInst->getOpcode();
460 }
461
462 // If there are three terminators, we don't know what sort of block this is.
463 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
464 return true;
465
466 // If the block ends with a B and a Bcc, handle it.
467 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
468 parseCondBranch(SecondLastInst, TBB, Cond);
469 FBB = LastInst->getOperand(0).getMBB();
470 return false;
471 }
472
473 // If the block ends with two unconditional branches, handle it. The second
474 // one is not executed, so remove it.
475 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
476 TBB = SecondLastInst->getOperand(0).getMBB();
477 I = LastInst;
478 if (AllowModify)
479 I->eraseFromParent();
480 return false;
481 }
482
483 // ...likewise if it ends with an indirect branch followed by an unconditional
484 // branch.
485 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
486 I = LastInst;
487 if (AllowModify)
488 I->eraseFromParent();
489 return true;
490 }
491
492 // Otherwise, can't handle this.
493 return true;
494}
495
497 MachineBranchPredicate &MBP,
498 bool AllowModify) const {
499 // For the moment, handle only a block which ends with a cb(n)zx followed by
500 // a fallthrough. Why this? Because it is a common form.
501 // TODO: Should we handle b.cc?
502
503 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
504 if (I == MBB.end())
505 return true;
506
507 // Skip over SpeculationBarrierEndBB terminators
508 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
509 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
510 --I;
511 }
512
513 if (!isUnpredicatedTerminator(*I))
514 return true;
515
516 // Get the last instruction in the block.
517 MachineInstr *LastInst = &*I;
518 unsigned LastOpc = LastInst->getOpcode();
519 if (!isCondBranchOpcode(LastOpc))
520 return true;
521
522 switch (LastOpc) {
523 default:
524 return true;
525 case AArch64::CBZW:
526 case AArch64::CBZX:
527 case AArch64::CBNZW:
528 case AArch64::CBNZX:
529 break;
530 };
531
532 MBP.TrueDest = LastInst->getOperand(1).getMBB();
533 assert(MBP.TrueDest && "expected!");
534 MBP.FalseDest = MBB.getNextNode();
535
536 MBP.ConditionDef = nullptr;
537 MBP.SingleUseCondition = false;
538
539 MBP.LHS = LastInst->getOperand(0);
540 MBP.RHS = MachineOperand::CreateImm(0);
541 MBP.Predicate = (LastOpc == AArch64::CBNZX || LastOpc == AArch64::CBNZW)
542 ? MachineBranchPredicate::PRED_NE
543 : MachineBranchPredicate::PRED_EQ;
544 return false;
545}
546
549 if (Cond[0].getImm() != -1) {
550 // Regular Bcc
551 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
553 } else {
554 // Folded compare-and-branch
555 switch (Cond[1].getImm()) {
556 default:
557 llvm_unreachable("Unknown conditional branch!");
558 case AArch64::CBZW:
559 Cond[1].setImm(AArch64::CBNZW);
560 break;
561 case AArch64::CBNZW:
562 Cond[1].setImm(AArch64::CBZW);
563 break;
564 case AArch64::CBZX:
565 Cond[1].setImm(AArch64::CBNZX);
566 break;
567 case AArch64::CBNZX:
568 Cond[1].setImm(AArch64::CBZX);
569 break;
570 case AArch64::TBZW:
571 Cond[1].setImm(AArch64::TBNZW);
572 break;
573 case AArch64::TBNZW:
574 Cond[1].setImm(AArch64::TBZW);
575 break;
576 case AArch64::TBZX:
577 Cond[1].setImm(AArch64::TBNZX);
578 break;
579 case AArch64::TBNZX:
580 Cond[1].setImm(AArch64::TBZX);
581 break;
582
583 // Cond is { -1, Opcode, CC, Op0, Op1 }
584 case AArch64::CBWPri:
585 case AArch64::CBXPri:
586 case AArch64::CBWPrr:
587 case AArch64::CBXPrr: {
588 // Pseudos using standard 4bit Arm condition codes
590 static_cast<AArch64CC::CondCode>(Cond[2].getImm());
592 }
593 }
594 }
595
596 return false;
597}
598
600 int *BytesRemoved) const {
601 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
602 if (I == MBB.end())
603 return 0;
604
605 if (!isUncondBranchOpcode(I->getOpcode()) &&
606 !isCondBranchOpcode(I->getOpcode()))
607 return 0;
608
609 // Remove the branch.
610 I->eraseFromParent();
611
612 I = MBB.end();
613
614 if (I == MBB.begin()) {
615 if (BytesRemoved)
616 *BytesRemoved = 4;
617 return 1;
618 }
619 --I;
620 if (!isCondBranchOpcode(I->getOpcode())) {
621 if (BytesRemoved)
622 *BytesRemoved = 4;
623 return 1;
624 }
625
626 // Remove the branch.
627 I->eraseFromParent();
628 if (BytesRemoved)
629 *BytesRemoved = 8;
630
631 return 2;
632}
633
634void AArch64InstrInfo::instantiateCondBranch(
637 if (Cond[0].getImm() != -1) {
638 // Regular Bcc
639 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
640 } else {
641 // Folded compare-and-branch
642 // Note that we use addOperand instead of addReg to keep the flags.
643
644 // cbz, cbnz
645 const MachineInstrBuilder MIB =
646 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
647
648 // tbz/tbnz
649 if (Cond.size() > 3)
650 MIB.add(Cond[3]);
651
652 // cb
653 if (Cond.size() > 4)
654 MIB.add(Cond[4]);
655
656 MIB.addMBB(TBB);
657 }
658}
659
662 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
663 // Shouldn't be a fall through.
664 assert(TBB && "insertBranch must not be told to insert a fallthrough");
665
666 if (!FBB) {
667 if (Cond.empty()) // Unconditional branch?
668 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
669 else
670 instantiateCondBranch(MBB, DL, TBB, Cond);
671
672 if (BytesAdded)
673 *BytesAdded = 4;
674
675 return 1;
676 }
677
678 // Two-way conditional branch.
679 instantiateCondBranch(MBB, DL, TBB, Cond);
680 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
681
682 if (BytesAdded)
683 *BytesAdded = 8;
684
685 return 2;
686}
687
688// Find the original register that VReg is copied from.
689static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
690 while (Register::isVirtualRegister(VReg)) {
691 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
692 if (!DefMI->isFullCopy())
693 return VReg;
694 VReg = DefMI->getOperand(1).getReg();
695 }
696 return VReg;
697}
698
699// Determine if VReg is defined by an instruction that can be folded into a
700// csel instruction. If so, return the folded opcode, and the replacement
701// register.
702static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
703 unsigned *NewReg = nullptr) {
704 VReg = removeCopies(MRI, VReg);
706 return 0;
707
708 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
709 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
710 unsigned Opc = 0;
711 unsigned SrcReg = 0;
712 switch (DefMI->getOpcode()) {
713 case AArch64::SUBREG_TO_REG:
714 // Check for the following way to define an 64-bit immediate:
715 // %0:gpr32 = MOVi32imm 1
716 // %1:gpr64 = SUBREG_TO_REG 0, %0:gpr32, %subreg.sub_32
717 if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 0)
718 return 0;
719 if (!DefMI->getOperand(2).isReg())
720 return 0;
721 if (!DefMI->getOperand(3).isImm() ||
722 DefMI->getOperand(3).getImm() != AArch64::sub_32)
723 return 0;
724 DefMI = MRI.getVRegDef(DefMI->getOperand(2).getReg());
725 if (DefMI->getOpcode() != AArch64::MOVi32imm)
726 return 0;
727 if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
728 return 0;
729 assert(Is64Bit);
730 SrcReg = AArch64::XZR;
731 Opc = AArch64::CSINCXr;
732 break;
733
734 case AArch64::MOVi32imm:
735 case AArch64::MOVi64imm:
736 if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
737 return 0;
738 SrcReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
739 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
740 break;
741
742 case AArch64::ADDSXri:
743 case AArch64::ADDSWri:
744 // if NZCV is used, do not fold.
745 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
746 true) == -1)
747 return 0;
748 // fall-through to ADDXri and ADDWri.
749 [[fallthrough]];
750 case AArch64::ADDXri:
751 case AArch64::ADDWri:
752 // add x, 1 -> csinc.
753 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
754 DefMI->getOperand(3).getImm() != 0)
755 return 0;
756 SrcReg = DefMI->getOperand(1).getReg();
757 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
758 break;
759
760 case AArch64::ORNXrr:
761 case AArch64::ORNWrr: {
762 // not x -> csinv, represented as orn dst, xzr, src.
763 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
764 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
765 return 0;
766 SrcReg = DefMI->getOperand(2).getReg();
767 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
768 break;
769 }
770
771 case AArch64::SUBSXrr:
772 case AArch64::SUBSWrr:
773 // if NZCV is used, do not fold.
774 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
775 true) == -1)
776 return 0;
777 // fall-through to SUBXrr and SUBWrr.
778 [[fallthrough]];
779 case AArch64::SUBXrr:
780 case AArch64::SUBWrr: {
781 // neg x -> csneg, represented as sub dst, xzr, src.
782 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
783 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
784 return 0;
785 SrcReg = DefMI->getOperand(2).getReg();
786 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
787 break;
788 }
789 default:
790 return 0;
791 }
792 assert(Opc && SrcReg && "Missing parameters");
793
794 if (NewReg)
795 *NewReg = SrcReg;
796 return Opc;
797}
798
801 Register DstReg, Register TrueReg,
802 Register FalseReg, int &CondCycles,
803 int &TrueCycles,
804 int &FalseCycles) const {
805 // Check register classes.
806 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
807 const TargetRegisterClass *RC =
808 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
809 if (!RC)
810 return false;
811
812 // Also need to check the dest regclass, in case we're trying to optimize
813 // something like:
814 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
815 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
816 return false;
817
818 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
819 unsigned ExtraCondLat = Cond.size() != 1;
820
821 // GPRs are handled by csel.
822 // FIXME: Fold in x+1, -x, and ~x when applicable.
823 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
824 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
825 // Single-cycle csel, csinc, csinv, and csneg.
826 CondCycles = 1 + ExtraCondLat;
827 TrueCycles = FalseCycles = 1;
828 if (canFoldIntoCSel(MRI, TrueReg))
829 TrueCycles = 0;
830 else if (canFoldIntoCSel(MRI, FalseReg))
831 FalseCycles = 0;
832 return true;
833 }
834
835 // Scalar floating point is handled by fcsel.
836 // FIXME: Form fabs, fmin, and fmax when applicable.
837 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
838 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
839 CondCycles = 5 + ExtraCondLat;
840 TrueCycles = FalseCycles = 2;
841 return true;
842 }
843
844 // Can't do vectors.
845 return false;
846}
847
850 const DebugLoc &DL, Register DstReg,
852 Register TrueReg, Register FalseReg) const {
853 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
854
855 // Parse the condition code, see parseCondBranch() above.
857 switch (Cond.size()) {
858 default:
859 llvm_unreachable("Unknown condition opcode in Cond");
860 case 1: // b.cc
862 break;
863 case 3: { // cbz/cbnz
864 // We must insert a compare against 0.
865 bool Is64Bit;
866 switch (Cond[1].getImm()) {
867 default:
868 llvm_unreachable("Unknown branch opcode in Cond");
869 case AArch64::CBZW:
870 Is64Bit = false;
871 CC = AArch64CC::EQ;
872 break;
873 case AArch64::CBZX:
874 Is64Bit = true;
875 CC = AArch64CC::EQ;
876 break;
877 case AArch64::CBNZW:
878 Is64Bit = false;
879 CC = AArch64CC::NE;
880 break;
881 case AArch64::CBNZX:
882 Is64Bit = true;
883 CC = AArch64CC::NE;
884 break;
885 }
886 Register SrcReg = Cond[2].getReg();
887 if (Is64Bit) {
888 // cmp reg, #0 is actually subs xzr, reg, #0.
889 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
890 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
891 .addReg(SrcReg)
892 .addImm(0)
893 .addImm(0);
894 } else {
895 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
896 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
897 .addReg(SrcReg)
898 .addImm(0)
899 .addImm(0);
900 }
901 break;
902 }
903 case 4: { // tbz/tbnz
904 // We must insert a tst instruction.
905 switch (Cond[1].getImm()) {
906 default:
907 llvm_unreachable("Unknown branch opcode in Cond");
908 case AArch64::TBZW:
909 case AArch64::TBZX:
910 CC = AArch64CC::EQ;
911 break;
912 case AArch64::TBNZW:
913 case AArch64::TBNZX:
914 CC = AArch64CC::NE;
915 break;
916 }
917 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
918 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
919 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
920 .addReg(Cond[2].getReg())
921 .addImm(
923 else
924 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
925 .addReg(Cond[2].getReg())
926 .addImm(
928 break;
929 }
930 case 5: { // cb
931 // We must insert a cmp, that is a subs
932 // 0 1 2 3 4
933 // Cond is { -1, Opcode, CC, Op0, Op1 }
934 unsigned SUBSOpC, SUBSDestReg;
935 bool IsImm = false;
936 CC = static_cast<AArch64CC::CondCode>(Cond[2].getImm());
937 switch (Cond[1].getImm()) {
938 default:
939 llvm_unreachable("Unknown branch opcode in Cond");
940 case AArch64::CBWPri:
941 SUBSOpC = AArch64::SUBSWri;
942 SUBSDestReg = AArch64::WZR;
943 IsImm = true;
944 break;
945 case AArch64::CBXPri:
946 SUBSOpC = AArch64::SUBSXri;
947 SUBSDestReg = AArch64::XZR;
948 IsImm = true;
949 break;
950 case AArch64::CBWPrr:
951 SUBSOpC = AArch64::SUBSWrr;
952 SUBSDestReg = AArch64::WZR;
953 IsImm = false;
954 break;
955 case AArch64::CBXPrr:
956 SUBSOpC = AArch64::SUBSXrr;
957 SUBSDestReg = AArch64::XZR;
958 IsImm = false;
959 break;
960 }
961
962 if (IsImm)
963 BuildMI(MBB, I, DL, get(SUBSOpC), SUBSDestReg)
964 .addReg(Cond[3].getReg())
965 .addImm(Cond[4].getImm())
966 .addImm(0);
967 else
968 BuildMI(MBB, I, DL, get(SUBSOpC), SUBSDestReg)
969 .addReg(Cond[3].getReg())
970 .addReg(Cond[4].getReg());
971 }
972 }
973
974 unsigned Opc = 0;
975 const TargetRegisterClass *RC = nullptr;
976 bool TryFold = false;
977 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
978 RC = &AArch64::GPR64RegClass;
979 Opc = AArch64::CSELXr;
980 TryFold = true;
981 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
982 RC = &AArch64::GPR32RegClass;
983 Opc = AArch64::CSELWr;
984 TryFold = true;
985 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
986 RC = &AArch64::FPR64RegClass;
987 Opc = AArch64::FCSELDrrr;
988 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
989 RC = &AArch64::FPR32RegClass;
990 Opc = AArch64::FCSELSrrr;
991 }
992 assert(RC && "Unsupported regclass");
993
994 // Try folding simple instructions into the csel.
995 if (TryFold) {
996 unsigned NewReg = 0;
997 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewReg);
998 if (FoldedOpc) {
999 // The folded opcodes csinc, csinc and csneg apply the operation to
1000 // FalseReg, so we need to invert the condition.
1002 TrueReg = FalseReg;
1003 } else
1004 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewReg);
1005
1006 // Fold the operation. Leave any dead instructions for DCE to clean up.
1007 if (FoldedOpc) {
1008 FalseReg = NewReg;
1009 Opc = FoldedOpc;
1010 // Extend the live range of NewReg.
1011 MRI.clearKillFlags(NewReg);
1012 }
1013 }
1014
1015 // Pull all virtual register into the appropriate class.
1016 MRI.constrainRegClass(TrueReg, RC);
1017 // FalseReg might be WZR or XZR if the folded operand is a literal 1.
1018 assert(
1019 (FalseReg.isVirtual() || FalseReg == AArch64::WZR ||
1020 FalseReg == AArch64::XZR) &&
1021 "FalseReg was folded into a non-virtual register other than WZR or XZR");
1022 if (FalseReg.isVirtual())
1023 MRI.constrainRegClass(FalseReg, RC);
1024
1025 // Insert the csel.
1026 BuildMI(MBB, I, DL, get(Opc), DstReg)
1027 .addReg(TrueReg)
1028 .addReg(FalseReg)
1029 .addImm(CC);
1030}
1031
1032// Return true if Imm can be loaded into a register by a "cheap" sequence of
1033// instructions. For now, "cheap" means at most two instructions.
1034static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
1035 if (BitSize == 32)
1036 return true;
1037
1038 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
1039 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
1041 AArch64_IMM::expandMOVImm(Imm, BitSize, Is);
1042
1043 return Is.size() <= 2;
1044}
1045
1046// FIXME: this implementation should be micro-architecture dependent, so a
1047// micro-architecture target hook should be introduced here in future.
1049 if (Subtarget.hasExynosCheapAsMoveHandling()) {
1050 if (isExynosCheapAsMove(MI))
1051 return true;
1052 return MI.isAsCheapAsAMove();
1053 }
1054
1055 switch (MI.getOpcode()) {
1056 default:
1057 return MI.isAsCheapAsAMove();
1058
1059 case AArch64::ADDWrs:
1060 case AArch64::ADDXrs:
1061 case AArch64::SUBWrs:
1062 case AArch64::SUBXrs:
1063 return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;
1064
1065 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
1066 // ORRXri, it is as cheap as MOV.
1067 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
1068 case AArch64::MOVi32imm:
1069 return isCheapImmediate(MI, 32);
1070 case AArch64::MOVi64imm:
1071 return isCheapImmediate(MI, 64);
1072 }
1073}
1074
1075bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
1076 switch (MI.getOpcode()) {
1077 default:
1078 return false;
1079
1080 case AArch64::ADDWrs:
1081 case AArch64::ADDXrs:
1082 case AArch64::ADDSWrs:
1083 case AArch64::ADDSXrs: {
1084 unsigned Imm = MI.getOperand(3).getImm();
1085 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1086 if (ShiftVal == 0)
1087 return true;
1088 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
1089 }
1090
1091 case AArch64::ADDWrx:
1092 case AArch64::ADDXrx:
1093 case AArch64::ADDXrx64:
1094 case AArch64::ADDSWrx:
1095 case AArch64::ADDSXrx:
1096 case AArch64::ADDSXrx64: {
1097 unsigned Imm = MI.getOperand(3).getImm();
1098 switch (AArch64_AM::getArithExtendType(Imm)) {
1099 default:
1100 return false;
1101 case AArch64_AM::UXTB:
1102 case AArch64_AM::UXTH:
1103 case AArch64_AM::UXTW:
1104 case AArch64_AM::UXTX:
1105 return AArch64_AM::getArithShiftValue(Imm) <= 4;
1106 }
1107 }
1108
1109 case AArch64::SUBWrs:
1110 case AArch64::SUBSWrs: {
1111 unsigned Imm = MI.getOperand(3).getImm();
1112 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1113 return ShiftVal == 0 ||
1114 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
1115 }
1116
1117 case AArch64::SUBXrs:
1118 case AArch64::SUBSXrs: {
1119 unsigned Imm = MI.getOperand(3).getImm();
1120 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1121 return ShiftVal == 0 ||
1122 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
1123 }
1124
1125 case AArch64::SUBWrx:
1126 case AArch64::SUBXrx:
1127 case AArch64::SUBXrx64:
1128 case AArch64::SUBSWrx:
1129 case AArch64::SUBSXrx:
1130 case AArch64::SUBSXrx64: {
1131 unsigned Imm = MI.getOperand(3).getImm();
1132 switch (AArch64_AM::getArithExtendType(Imm)) {
1133 default:
1134 return false;
1135 case AArch64_AM::UXTB:
1136 case AArch64_AM::UXTH:
1137 case AArch64_AM::UXTW:
1138 case AArch64_AM::UXTX:
1139 return AArch64_AM::getArithShiftValue(Imm) == 0;
1140 }
1141 }
1142
1143 case AArch64::LDRBBroW:
1144 case AArch64::LDRBBroX:
1145 case AArch64::LDRBroW:
1146 case AArch64::LDRBroX:
1147 case AArch64::LDRDroW:
1148 case AArch64::LDRDroX:
1149 case AArch64::LDRHHroW:
1150 case AArch64::LDRHHroX:
1151 case AArch64::LDRHroW:
1152 case AArch64::LDRHroX:
1153 case AArch64::LDRQroW:
1154 case AArch64::LDRQroX:
1155 case AArch64::LDRSBWroW:
1156 case AArch64::LDRSBWroX:
1157 case AArch64::LDRSBXroW:
1158 case AArch64::LDRSBXroX:
1159 case AArch64::LDRSHWroW:
1160 case AArch64::LDRSHWroX:
1161 case AArch64::LDRSHXroW:
1162 case AArch64::LDRSHXroX:
1163 case AArch64::LDRSWroW:
1164 case AArch64::LDRSWroX:
1165 case AArch64::LDRSroW:
1166 case AArch64::LDRSroX:
1167 case AArch64::LDRWroW:
1168 case AArch64::LDRWroX:
1169 case AArch64::LDRXroW:
1170 case AArch64::LDRXroX:
1171 case AArch64::PRFMroW:
1172 case AArch64::PRFMroX:
1173 case AArch64::STRBBroW:
1174 case AArch64::STRBBroX:
1175 case AArch64::STRBroW:
1176 case AArch64::STRBroX:
1177 case AArch64::STRDroW:
1178 case AArch64::STRDroX:
1179 case AArch64::STRHHroW:
1180 case AArch64::STRHHroX:
1181 case AArch64::STRHroW:
1182 case AArch64::STRHroX:
1183 case AArch64::STRQroW:
1184 case AArch64::STRQroX:
1185 case AArch64::STRSroW:
1186 case AArch64::STRSroX:
1187 case AArch64::STRWroW:
1188 case AArch64::STRWroX:
1189 case AArch64::STRXroW:
1190 case AArch64::STRXroX: {
1191 unsigned IsSigned = MI.getOperand(3).getImm();
1192 return !IsSigned;
1193 }
1194 }
1195}
1196
1197bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
1198 unsigned Opc = MI.getOpcode();
1199 switch (Opc) {
1200 default:
1201 return false;
1202 case AArch64::SEH_StackAlloc:
1203 case AArch64::SEH_SaveFPLR:
1204 case AArch64::SEH_SaveFPLR_X:
1205 case AArch64::SEH_SaveReg:
1206 case AArch64::SEH_SaveReg_X:
1207 case AArch64::SEH_SaveRegP:
1208 case AArch64::SEH_SaveRegP_X:
1209 case AArch64::SEH_SaveFReg:
1210 case AArch64::SEH_SaveFReg_X:
1211 case AArch64::SEH_SaveFRegP:
1212 case AArch64::SEH_SaveFRegP_X:
1213 case AArch64::SEH_SetFP:
1214 case AArch64::SEH_AddFP:
1215 case AArch64::SEH_Nop:
1216 case AArch64::SEH_PrologEnd:
1217 case AArch64::SEH_EpilogStart:
1218 case AArch64::SEH_EpilogEnd:
1219 case AArch64::SEH_PACSignLR:
1220 case AArch64::SEH_SaveAnyRegI:
1221 case AArch64::SEH_SaveAnyRegIP:
1222 case AArch64::SEH_SaveAnyRegQP:
1223 case AArch64::SEH_SaveAnyRegQPX:
1224 case AArch64::SEH_AllocZ:
1225 case AArch64::SEH_SaveZReg:
1226 case AArch64::SEH_SavePReg:
1227 return true;
1228 }
1229}
1230
1232 Register &SrcReg, Register &DstReg,
1233 unsigned &SubIdx) const {
1234 switch (MI.getOpcode()) {
1235 default:
1236 return false;
1237 case AArch64::SBFMXri: // aka sxtw
1238 case AArch64::UBFMXri: // aka uxtw
1239 // Check for the 32 -> 64 bit extension case, these instructions can do
1240 // much more.
1241 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1242 return false;
1243 // This is a signed or unsigned 32 -> 64 bit extension.
1244 SrcReg = MI.getOperand(1).getReg();
1245 DstReg = MI.getOperand(0).getReg();
1246 SubIdx = AArch64::sub_32;
1247 return true;
1248 }
1249}
1250
1252 const MachineInstr &MIa, const MachineInstr &MIb) const {
1254 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1255 int64_t OffsetA = 0, OffsetB = 0;
1256 TypeSize WidthA(0, false), WidthB(0, false);
1257 bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1258
1259 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1260 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1261
1264 return false;
1265
1266 // Retrieve the base, offset from the base and width. Width
1267 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1268 // base are identical, and the offset of a lower memory access +
1269 // the width doesn't overlap the offset of a higher memory access,
1270 // then the memory accesses are different.
1271 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1272 // are assumed to have the same scale (vscale).
1273 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1274 WidthA, TRI) &&
1275 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1276 WidthB, TRI)) {
1277 if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1278 OffsetAIsScalable == OffsetBIsScalable) {
1279 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1280 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1281 TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1282 if (LowWidth.isScalable() == OffsetAIsScalable &&
1283 LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1284 return true;
1285 }
1286 }
1287 return false;
1288}
1289
1291 const MachineBasicBlock *MBB,
1292 const MachineFunction &MF) const {
1294 return true;
1295
1296 // Do not move an instruction that can be recognized as a branch target.
1297 if (hasBTISemantics(MI))
1298 return true;
1299
1300 switch (MI.getOpcode()) {
1301 case AArch64::HINT:
1302 // CSDB hints are scheduling barriers.
1303 if (MI.getOperand(0).getImm() == 0x14)
1304 return true;
1305 break;
1306 case AArch64::DSB:
1307 case AArch64::ISB:
1308 // DSB and ISB also are scheduling barriers.
1309 return true;
1310 case AArch64::MSRpstatesvcrImm1:
1311 // SMSTART and SMSTOP are also scheduling barriers.
1312 return true;
1313 default:;
1314 }
1315 if (isSEHInstruction(MI))
1316 return true;
1317 auto Next = std::next(MI.getIterator());
1318 return Next != MBB->end() && Next->isCFIInstruction();
1319}
1320
1321/// analyzeCompare - For a comparison instruction, return the source registers
1322/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1323/// Return true if the comparison instruction can be analyzed.
1325 Register &SrcReg2, int64_t &CmpMask,
1326 int64_t &CmpValue) const {
1327 // The first operand can be a frame index where we'd normally expect a
1328 // register.
1329 // FIXME: Pass subregisters out of analyzeCompare
1330 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1331 if (!MI.getOperand(1).isReg() || MI.getOperand(1).getSubReg())
1332 return false;
1333
1334 switch (MI.getOpcode()) {
1335 default:
1336 break;
1337 case AArch64::PTEST_PP:
1338 case AArch64::PTEST_PP_ANY:
1339 case AArch64::PTEST_PP_FIRST:
1340 SrcReg = MI.getOperand(0).getReg();
1341 SrcReg2 = MI.getOperand(1).getReg();
1342 if (MI.getOperand(2).getSubReg())
1343 return false;
1344
1345 // Not sure about the mask and value for now...
1346 CmpMask = ~0;
1347 CmpValue = 0;
1348 return true;
1349 case AArch64::SUBSWrr:
1350 case AArch64::SUBSWrs:
1351 case AArch64::SUBSWrx:
1352 case AArch64::SUBSXrr:
1353 case AArch64::SUBSXrs:
1354 case AArch64::SUBSXrx:
1355 case AArch64::ADDSWrr:
1356 case AArch64::ADDSWrs:
1357 case AArch64::ADDSWrx:
1358 case AArch64::ADDSXrr:
1359 case AArch64::ADDSXrs:
1360 case AArch64::ADDSXrx:
1361 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1362 SrcReg = MI.getOperand(1).getReg();
1363 SrcReg2 = MI.getOperand(2).getReg();
1364
1365 // FIXME: Pass subregisters out of analyzeCompare
1366 if (MI.getOperand(2).getSubReg())
1367 return false;
1368
1369 CmpMask = ~0;
1370 CmpValue = 0;
1371 return true;
1372 case AArch64::SUBSWri:
1373 case AArch64::ADDSWri:
1374 case AArch64::SUBSXri:
1375 case AArch64::ADDSXri:
1376 SrcReg = MI.getOperand(1).getReg();
1377 SrcReg2 = 0;
1378 CmpMask = ~0;
1379 CmpValue = MI.getOperand(2).getImm();
1380 return true;
1381 case AArch64::ANDSWri:
1382 case AArch64::ANDSXri:
1383 // ANDS does not use the same encoding scheme as the others xxxS
1384 // instructions.
1385 SrcReg = MI.getOperand(1).getReg();
1386 SrcReg2 = 0;
1387 CmpMask = ~0;
1389 MI.getOperand(2).getImm(),
1390 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1391 return true;
1392 }
1393
1394 return false;
1395}
1396
1398 MachineBasicBlock *MBB = Instr.getParent();
1399 assert(MBB && "Can't get MachineBasicBlock here");
1400 MachineFunction *MF = MBB->getParent();
1401 assert(MF && "Can't get MachineFunction here");
1405
1406 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1407 ++OpIdx) {
1408 MachineOperand &MO = Instr.getOperand(OpIdx);
1409 const TargetRegisterClass *OpRegCstraints =
1410 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1411
1412 // If there's no constraint, there's nothing to do.
1413 if (!OpRegCstraints)
1414 continue;
1415 // If the operand is a frame index, there's nothing to do here.
1416 // A frame index operand will resolve correctly during PEI.
1417 if (MO.isFI())
1418 continue;
1419
1420 assert(MO.isReg() &&
1421 "Operand has register constraints without being a register!");
1422
1423 Register Reg = MO.getReg();
1424 if (Reg.isPhysical()) {
1425 if (!OpRegCstraints->contains(Reg))
1426 return false;
1427 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1428 !MRI->constrainRegClass(Reg, OpRegCstraints))
1429 return false;
1430 }
1431
1432 return true;
1433}
1434
1435/// Return the opcode that does not set flags when possible - otherwise
1436/// return the original opcode. The caller is responsible to do the actual
1437/// substitution and legality checking.
1439 // Don't convert all compare instructions, because for some the zero register
1440 // encoding becomes the sp register.
1441 bool MIDefinesZeroReg = false;
1442 if (MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1443 MI.definesRegister(AArch64::XZR, /*TRI=*/nullptr))
1444 MIDefinesZeroReg = true;
1445
1446 switch (MI.getOpcode()) {
1447 default:
1448 return MI.getOpcode();
1449 case AArch64::ADDSWrr:
1450 return AArch64::ADDWrr;
1451 case AArch64::ADDSWri:
1452 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1453 case AArch64::ADDSWrs:
1454 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1455 case AArch64::ADDSWrx:
1456 return AArch64::ADDWrx;
1457 case AArch64::ADDSXrr:
1458 return AArch64::ADDXrr;
1459 case AArch64::ADDSXri:
1460 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1461 case AArch64::ADDSXrs:
1462 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1463 case AArch64::ADDSXrx:
1464 return AArch64::ADDXrx;
1465 case AArch64::SUBSWrr:
1466 return AArch64::SUBWrr;
1467 case AArch64::SUBSWri:
1468 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1469 case AArch64::SUBSWrs:
1470 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1471 case AArch64::SUBSWrx:
1472 return AArch64::SUBWrx;
1473 case AArch64::SUBSXrr:
1474 return AArch64::SUBXrr;
1475 case AArch64::SUBSXri:
1476 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1477 case AArch64::SUBSXrs:
1478 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1479 case AArch64::SUBSXrx:
1480 return AArch64::SUBXrx;
1481 }
1482}
1483
1484enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1485
1486/// True when condition flags are accessed (either by writing or reading)
1487/// on the instruction trace starting at From and ending at To.
1488///
1489/// Note: If From and To are from different blocks it's assumed CC are accessed
1490/// on the path.
1493 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1494 // Early exit if To is at the beginning of the BB.
1495 if (To == To->getParent()->begin())
1496 return true;
1497
1498 // Check whether the instructions are in the same basic block
1499 // If not, assume the condition flags might get modified somewhere.
1500 if (To->getParent() != From->getParent())
1501 return true;
1502
1503 // From must be above To.
1504 assert(std::any_of(
1505 ++To.getReverse(), To->getParent()->rend(),
1506 [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1507
1508 // We iterate backward starting at \p To until we hit \p From.
1509 for (const MachineInstr &Instr :
1511 if (((AccessToCheck & AK_Write) &&
1512 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1513 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1514 return true;
1515 }
1516 return false;
1517}
1518
1519std::optional<unsigned>
1520AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
1521 MachineInstr *Pred,
1522 const MachineRegisterInfo *MRI) const {
1523 unsigned MaskOpcode = Mask->getOpcode();
1524 unsigned PredOpcode = Pred->getOpcode();
1525 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1526 bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1527
1528 if (PredIsWhileLike) {
1529 // For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
1530 // instruction and the condition is "any" since WHILcc does an implicit
1531 // PTEST(ALL, PG) check and PG is always a subset of ALL.
1532 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1533 return PredOpcode;
1534
1535 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1536 // redundant since WHILE performs an implicit PTEST with an all active
1537 // mask.
1538 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1539 getElementSizeForOpcode(MaskOpcode) ==
1540 getElementSizeForOpcode(PredOpcode))
1541 return PredOpcode;
1542
1543 // For PTEST_FIRST(PTRUE_ALL, WHILE), the PTEST_FIRST is redundant since
1544 // WHILEcc performs an implicit PTEST with an all active mask, setting
1545 // the N flag as the PTEST_FIRST would.
1546 if (PTest->getOpcode() == AArch64::PTEST_PP_FIRST &&
1547 isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31)
1548 return PredOpcode;
1549
1550 return {};
1551 }
1552
1553 if (PredIsPTestLike) {
1554 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1555 // instruction that sets the flags as PTEST would and the condition is
1556 // "any" since PG is always a subset of the governing predicate of the
1557 // ptest-like instruction.
1558 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1559 return PredOpcode;
1560
1561 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1562
1563 // If the PTEST like instruction's general predicate is not `Mask`, attempt
1564 // to look through a copy and try again. This is because some instructions
1565 // take a predicate whose register class is a subset of its result class.
1566 if (Mask != PTestLikeMask && PTestLikeMask->isFullCopy() &&
1567 PTestLikeMask->getOperand(1).getReg().isVirtual())
1568 PTestLikeMask =
1569 MRI->getUniqueVRegDef(PTestLikeMask->getOperand(1).getReg());
1570
1571 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1572 // the element size matches and either the PTEST_LIKE instruction uses
1573 // the same all active mask or the condition is "any".
1574 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1575 getElementSizeForOpcode(MaskOpcode) ==
1576 getElementSizeForOpcode(PredOpcode)) {
1577 if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1578 return PredOpcode;
1579 }
1580
1581 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1582 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1583 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1584 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1585 // performed by the compare could consider fewer lanes for these element
1586 // sizes.
1587 //
1588 // For example, consider
1589 //
1590 // ptrue p0.b ; P0=1111-1111-1111-1111
1591 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1592 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1593 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1594 // ; ^ last active
1595 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1596 // ; ^ last active
1597 //
1598 // where the compare generates a canonical all active 32-bit predicate
1599 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1600 // active flag, whereas the PTEST instruction with the same mask doesn't.
1601 // For PTEST_ANY this doesn't apply as the flags in this case would be
1602 // identical regardless of element size.
1603 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1604 if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
1605 PTest->getOpcode() == AArch64::PTEST_PP_ANY))
1606 return PredOpcode;
1607
1608 return {};
1609 }
1610
1611 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1612 // opcode so the PTEST becomes redundant.
1613 switch (PredOpcode) {
1614 case AArch64::AND_PPzPP:
1615 case AArch64::BIC_PPzPP:
1616 case AArch64::EOR_PPzPP:
1617 case AArch64::NAND_PPzPP:
1618 case AArch64::NOR_PPzPP:
1619 case AArch64::ORN_PPzPP:
1620 case AArch64::ORR_PPzPP:
1621 case AArch64::BRKA_PPzP:
1622 case AArch64::BRKPA_PPzPP:
1623 case AArch64::BRKB_PPzP:
1624 case AArch64::BRKPB_PPzPP:
1625 case AArch64::RDFFR_PPz: {
1626 // Check to see if our mask is the same. If not the resulting flag bits
1627 // may be different and we can't remove the ptest.
1628 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1629 if (Mask != PredMask)
1630 return {};
1631 break;
1632 }
1633 case AArch64::BRKN_PPzP: {
1634 // BRKN uses an all active implicit mask to set flags unlike the other
1635 // flag-setting instructions.
1636 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1637 if ((MaskOpcode != AArch64::PTRUE_B) ||
1638 (Mask->getOperand(1).getImm() != 31))
1639 return {};
1640 break;
1641 }
1642 case AArch64::PTRUE_B:
1643 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1644 break;
1645 default:
1646 // Bail out if we don't recognize the input
1647 return {};
1648 }
1649
1650 return convertToFlagSettingOpc(PredOpcode);
1651}
1652
1653/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1654/// operation which could set the flags in an identical manner
1655bool AArch64InstrInfo::optimizePTestInstr(
1656 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1657 const MachineRegisterInfo *MRI) const {
1658 auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1659 auto *Pred = MRI->getUniqueVRegDef(PredReg);
1660
1661 if (Pred->isCopy() && PTest->getOpcode() == AArch64::PTEST_PP_FIRST) {
1662 // Instructions which return a multi-vector (e.g. WHILECC_x2) require copies
1663 // before the branch to extract each subregister.
1664 auto Op = Pred->getOperand(1);
1665 if (Op.isReg() && Op.getReg().isVirtual() &&
1666 Op.getSubReg() == AArch64::psub0)
1667 Pred = MRI->getUniqueVRegDef(Op.getReg());
1668 }
1669
1670 unsigned PredOpcode = Pred->getOpcode();
1671 auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
1672 if (!NewOp)
1673 return false;
1674
1675 const TargetRegisterInfo *TRI = &getRegisterInfo();
1676
1677 // If another instruction between Pred and PTest accesses flags, don't remove
1678 // the ptest or update the earlier instruction to modify them.
1679 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1680 return false;
1681
1682 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1683 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1684 // operand to be replaced with an equivalent instruction that also sets the
1685 // flags.
1686 PTest->eraseFromParent();
1687 if (*NewOp != PredOpcode) {
1688 Pred->setDesc(get(*NewOp));
1689 bool succeeded = UpdateOperandRegClass(*Pred);
1690 (void)succeeded;
1691 assert(succeeded && "Operands have incompatible register classes!");
1692 Pred->addRegisterDefined(AArch64::NZCV, TRI);
1693 }
1694
1695 // Ensure that the flags def is live.
1696 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1697 unsigned i = 0, e = Pred->getNumOperands();
1698 for (; i != e; ++i) {
1699 MachineOperand &MO = Pred->getOperand(i);
1700 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1701 MO.setIsDead(false);
1702 break;
1703 }
1704 }
1705 }
1706 return true;
1707}
1708
1709/// Try to optimize a compare instruction. A compare instruction is an
1710/// instruction which produces AArch64::NZCV. It can be truly compare
1711/// instruction
1712/// when there are no uses of its destination register.
1713///
1714/// The following steps are tried in order:
1715/// 1. Convert CmpInstr into an unconditional version.
1716/// 2. Remove CmpInstr if above there is an instruction producing a needed
1717/// condition code or an instruction which can be converted into such an
1718/// instruction.
1719/// Only comparison with zero is supported.
1721 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1722 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1723 assert(CmpInstr.getParent());
1724 assert(MRI);
1725
1726 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1727 int DeadNZCVIdx =
1728 CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
1729 if (DeadNZCVIdx != -1) {
1730 if (CmpInstr.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1731 CmpInstr.definesRegister(AArch64::XZR, /*TRI=*/nullptr)) {
1732 CmpInstr.eraseFromParent();
1733 return true;
1734 }
1735 unsigned Opc = CmpInstr.getOpcode();
1736 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1737 if (NewOpc == Opc)
1738 return false;
1739 const MCInstrDesc &MCID = get(NewOpc);
1740 CmpInstr.setDesc(MCID);
1741 CmpInstr.removeOperand(DeadNZCVIdx);
1742 bool succeeded = UpdateOperandRegClass(CmpInstr);
1743 (void)succeeded;
1744 assert(succeeded && "Some operands reg class are incompatible!");
1745 return true;
1746 }
1747
1748 if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
1749 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY ||
1750 CmpInstr.getOpcode() == AArch64::PTEST_PP_FIRST)
1751 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
1752
1753 if (SrcReg2 != 0)
1754 return false;
1755
1756 // CmpInstr is a Compare instruction if destination register is not used.
1757 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1758 return false;
1759
1760 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
1761 return true;
1762 return (CmpValue == 0 || CmpValue == 1) &&
1763 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
1764}
1765
1766/// Get opcode of S version of Instr.
1767/// If Instr is S version its opcode is returned.
1768/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1769/// or we are not interested in it.
1770static unsigned sForm(MachineInstr &Instr) {
1771 switch (Instr.getOpcode()) {
1772 default:
1773 return AArch64::INSTRUCTION_LIST_END;
1774
1775 case AArch64::ADDSWrr:
1776 case AArch64::ADDSWri:
1777 case AArch64::ADDSXrr:
1778 case AArch64::ADDSXri:
1779 case AArch64::SUBSWrr:
1780 case AArch64::SUBSWri:
1781 case AArch64::SUBSXrr:
1782 case AArch64::SUBSXri:
1783 return Instr.getOpcode();
1784
1785 case AArch64::ADDWrr:
1786 return AArch64::ADDSWrr;
1787 case AArch64::ADDWri:
1788 return AArch64::ADDSWri;
1789 case AArch64::ADDXrr:
1790 return AArch64::ADDSXrr;
1791 case AArch64::ADDXri:
1792 return AArch64::ADDSXri;
1793 case AArch64::ADCWr:
1794 return AArch64::ADCSWr;
1795 case AArch64::ADCXr:
1796 return AArch64::ADCSXr;
1797 case AArch64::SUBWrr:
1798 return AArch64::SUBSWrr;
1799 case AArch64::SUBWri:
1800 return AArch64::SUBSWri;
1801 case AArch64::SUBXrr:
1802 return AArch64::SUBSXrr;
1803 case AArch64::SUBXri:
1804 return AArch64::SUBSXri;
1805 case AArch64::SBCWr:
1806 return AArch64::SBCSWr;
1807 case AArch64::SBCXr:
1808 return AArch64::SBCSXr;
1809 case AArch64::ANDWri:
1810 return AArch64::ANDSWri;
1811 case AArch64::ANDXri:
1812 return AArch64::ANDSXri;
1813 }
1814}
1815
1816/// Check if AArch64::NZCV should be alive in successors of MBB.
1818 for (auto *BB : MBB->successors())
1819 if (BB->isLiveIn(AArch64::NZCV))
1820 return true;
1821 return false;
1822}
1823
1824/// \returns The condition code operand index for \p Instr if it is a branch
1825/// or select and -1 otherwise.
1826static int
1828 switch (Instr.getOpcode()) {
1829 default:
1830 return -1;
1831
1832 case AArch64::Bcc: {
1833 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1834 assert(Idx >= 2);
1835 return Idx - 2;
1836 }
1837
1838 case AArch64::CSINVWr:
1839 case AArch64::CSINVXr:
1840 case AArch64::CSINCWr:
1841 case AArch64::CSINCXr:
1842 case AArch64::CSELWr:
1843 case AArch64::CSELXr:
1844 case AArch64::CSNEGWr:
1845 case AArch64::CSNEGXr:
1846 case AArch64::FCSELSrrr:
1847 case AArch64::FCSELDrrr: {
1848 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1849 assert(Idx >= 1);
1850 return Idx - 1;
1851 }
1852 }
1853}
1854
1855/// Find a condition code used by the instruction.
1856/// Returns AArch64CC::Invalid if either the instruction does not use condition
1857/// codes or we don't optimize CmpInstr in the presence of such instructions.
1860 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
1861 Instr.getOperand(CCIdx).getImm())
1863}
1864
1867 UsedNZCV UsedFlags;
1868 switch (CC) {
1869 default:
1870 break;
1871
1872 case AArch64CC::EQ: // Z set
1873 case AArch64CC::NE: // Z clear
1874 UsedFlags.Z = true;
1875 break;
1876
1877 case AArch64CC::HI: // Z clear and C set
1878 case AArch64CC::LS: // Z set or C clear
1879 UsedFlags.Z = true;
1880 [[fallthrough]];
1881 case AArch64CC::HS: // C set
1882 case AArch64CC::LO: // C clear
1883 UsedFlags.C = true;
1884 break;
1885
1886 case AArch64CC::MI: // N set
1887 case AArch64CC::PL: // N clear
1888 UsedFlags.N = true;
1889 break;
1890
1891 case AArch64CC::VS: // V set
1892 case AArch64CC::VC: // V clear
1893 UsedFlags.V = true;
1894 break;
1895
1896 case AArch64CC::GT: // Z clear, N and V the same
1897 case AArch64CC::LE: // Z set, N and V differ
1898 UsedFlags.Z = true;
1899 [[fallthrough]];
1900 case AArch64CC::GE: // N and V the same
1901 case AArch64CC::LT: // N and V differ
1902 UsedFlags.N = true;
1903 UsedFlags.V = true;
1904 break;
1905 }
1906 return UsedFlags;
1907}
1908
1909/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
1910/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
1911/// \returns std::nullopt otherwise.
1912///
1913/// Collect instructions using that flags in \p CCUseInstrs if provided.
1914std::optional<UsedNZCV>
1916 const TargetRegisterInfo &TRI,
1917 SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
1918 MachineBasicBlock *CmpParent = CmpInstr.getParent();
1919 if (MI.getParent() != CmpParent)
1920 return std::nullopt;
1921
1922 if (areCFlagsAliveInSuccessors(CmpParent))
1923 return std::nullopt;
1924
1925 UsedNZCV NZCVUsedAfterCmp;
1927 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
1928 if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
1930 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1931 return std::nullopt;
1932 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1933 if (CCUseInstrs)
1934 CCUseInstrs->push_back(&Instr);
1935 }
1936 if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
1937 break;
1938 }
1939 return NZCVUsedAfterCmp;
1940}
1941
1942static bool isADDSRegImm(unsigned Opcode) {
1943 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1944}
1945
1946static bool isSUBSRegImm(unsigned Opcode) {
1947 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1948}
1949
1950/// Check if CmpInstr can be substituted by MI.
1951///
1952/// CmpInstr can be substituted:
1953/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1954/// - and, MI and CmpInstr are from the same MachineBB
1955/// - and, condition flags are not alive in successors of the CmpInstr parent
1956/// - and, if MI opcode is the S form there must be no defs of flags between
1957/// MI and CmpInstr
1958/// or if MI opcode is not the S form there must be neither defs of flags
1959/// nor uses of flags between MI and CmpInstr.
1960/// - and, if C/V flags are not used after CmpInstr
1961/// or if N flag is used but MI produces poison value if signed overflow
1962/// occurs.
1964 const TargetRegisterInfo &TRI) {
1965 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
1966 // that may or may not set flags.
1967 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
1968
1969 const unsigned CmpOpcode = CmpInstr.getOpcode();
1970 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1971 return false;
1972
1973 assert((CmpInstr.getOperand(2).isImm() &&
1974 CmpInstr.getOperand(2).getImm() == 0) &&
1975 "Caller guarantees that CmpInstr compares with constant 0");
1976
1977 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
1978 if (!NZVCUsed || NZVCUsed->C)
1979 return false;
1980
1981 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
1982 // '%vreg = add ...' or '%vreg = sub ...'.
1983 // Condition flag V is used to indicate signed overflow.
1984 // 1) MI and CmpInstr set N and V to the same value.
1985 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
1986 // signed overflow occurs, so CmpInstr could still be simplified away.
1987 if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
1988 return false;
1989
1990 AccessKind AccessToCheck = AK_Write;
1991 if (sForm(MI) != MI.getOpcode())
1992 AccessToCheck = AK_All;
1993 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
1994}
1995
1996/// Substitute an instruction comparing to zero with another instruction
1997/// which produces needed condition flags.
1998///
1999/// Return true on success.
2000bool AArch64InstrInfo::substituteCmpToZero(
2001 MachineInstr &CmpInstr, unsigned SrcReg,
2002 const MachineRegisterInfo &MRI) const {
2003 // Get the unique definition of SrcReg.
2004 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
2005 if (!MI)
2006 return false;
2007
2008 const TargetRegisterInfo &TRI = getRegisterInfo();
2009
2010 unsigned NewOpc = sForm(*MI);
2011 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
2012 return false;
2013
2014 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
2015 return false;
2016
2017 // Update the instruction to set NZCV.
2018 MI->setDesc(get(NewOpc));
2019 CmpInstr.eraseFromParent();
2021 (void)succeeded;
2022 assert(succeeded && "Some operands reg class are incompatible!");
2023 MI->addRegisterDefined(AArch64::NZCV, &TRI);
2024 return true;
2025}
2026
2027/// \returns True if \p CmpInstr can be removed.
2028///
2029/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
2030/// codes used in \p CCUseInstrs must be inverted.
2032 int CmpValue, const TargetRegisterInfo &TRI,
2034 bool &IsInvertCC) {
2035 assert((CmpValue == 0 || CmpValue == 1) &&
2036 "Only comparisons to 0 or 1 considered for removal!");
2037
2038 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
2039 unsigned MIOpc = MI.getOpcode();
2040 if (MIOpc == AArch64::CSINCWr) {
2041 if (MI.getOperand(1).getReg() != AArch64::WZR ||
2042 MI.getOperand(2).getReg() != AArch64::WZR)
2043 return false;
2044 } else if (MIOpc == AArch64::CSINCXr) {
2045 if (MI.getOperand(1).getReg() != AArch64::XZR ||
2046 MI.getOperand(2).getReg() != AArch64::XZR)
2047 return false;
2048 } else {
2049 return false;
2050 }
2052 if (MICC == AArch64CC::Invalid)
2053 return false;
2054
2055 // NZCV needs to be defined
2056 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) != -1)
2057 return false;
2058
2059 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
2060 const unsigned CmpOpcode = CmpInstr.getOpcode();
2061 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
2062 if (CmpValue && !IsSubsRegImm)
2063 return false;
2064 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
2065 return false;
2066
2067 // MI conditions allowed: eq, ne, mi, pl
2068 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
2069 if (MIUsedNZCV.C || MIUsedNZCV.V)
2070 return false;
2071
2072 std::optional<UsedNZCV> NZCVUsedAfterCmp =
2073 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
2074 // Condition flags are not used in CmpInstr basic block successors and only
2075 // Z or N flags allowed to be used after CmpInstr within its basic block
2076 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
2077 return false;
2078 // Z or N flag used after CmpInstr must correspond to the flag used in MI
2079 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
2080 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
2081 return false;
2082 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
2083 if (MIUsedNZCV.N && !CmpValue)
2084 return false;
2085
2086 // There must be no defs of flags between MI and CmpInstr
2087 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
2088 return false;
2089
2090 // Condition code is inverted in the following cases:
2091 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
2092 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
2093 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
2094 (!CmpValue && MICC == AArch64CC::NE);
2095 return true;
2096}
2097
2098/// Remove comparison in csinc-cmp sequence
2099///
2100/// Examples:
2101/// 1. \code
2102/// csinc w9, wzr, wzr, ne
2103/// cmp w9, #0
2104/// b.eq
2105/// \endcode
2106/// to
2107/// \code
2108/// csinc w9, wzr, wzr, ne
2109/// b.ne
2110/// \endcode
2111///
2112/// 2. \code
2113/// csinc x2, xzr, xzr, mi
2114/// cmp x2, #1
2115/// b.pl
2116/// \endcode
2117/// to
2118/// \code
2119/// csinc x2, xzr, xzr, mi
2120/// b.pl
2121/// \endcode
2122///
2123/// \param CmpInstr comparison instruction
2124/// \return True when comparison removed
2125bool AArch64InstrInfo::removeCmpToZeroOrOne(
2126 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
2127 const MachineRegisterInfo &MRI) const {
2128 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
2129 if (!MI)
2130 return false;
2131 const TargetRegisterInfo &TRI = getRegisterInfo();
2132 SmallVector<MachineInstr *, 4> CCUseInstrs;
2133 bool IsInvertCC = false;
2134 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
2135 IsInvertCC))
2136 return false;
2137 // Make transformation
2138 CmpInstr.eraseFromParent();
2139 if (IsInvertCC) {
2140 // Invert condition codes in CmpInstr CC users
2141 for (MachineInstr *CCUseInstr : CCUseInstrs) {
2142 int Idx = findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr);
2143 assert(Idx >= 0 && "Unexpected instruction using CC.");
2144 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
2146 static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
2147 CCOperand.setImm(CCUse);
2148 }
2149 }
2150 return true;
2151}
2152
2153bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
2154 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
2155 MI.getOpcode() != AArch64::CATCHRET)
2156 return false;
2157
2158 MachineBasicBlock &MBB = *MI.getParent();
2159 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
2160 auto TRI = Subtarget.getRegisterInfo();
2161 DebugLoc DL = MI.getDebugLoc();
2162
2163 if (MI.getOpcode() == AArch64::CATCHRET) {
2164 // Skip to the first instruction before the epilog.
2165 const TargetInstrInfo *TII =
2167 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
2169 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
2170 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
2171 FirstEpilogSEH != MBB.begin())
2172 FirstEpilogSEH = std::prev(FirstEpilogSEH);
2173 if (FirstEpilogSEH != MBB.begin())
2174 FirstEpilogSEH = std::next(FirstEpilogSEH);
2175 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
2176 .addReg(AArch64::X0, RegState::Define)
2177 .addMBB(TargetMBB);
2178 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
2179 .addReg(AArch64::X0, RegState::Define)
2180 .addReg(AArch64::X0)
2181 .addMBB(TargetMBB)
2182 .addImm(0);
2183 TargetMBB->setMachineBlockAddressTaken();
2184 return true;
2185 }
2186
2187 Register Reg = MI.getOperand(0).getReg();
2189 if (M.getStackProtectorGuard() == "sysreg") {
2190 const AArch64SysReg::SysReg *SrcReg =
2191 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
2192 if (!SrcReg)
2193 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
2194
2195 // mrs xN, sysreg
2196 BuildMI(MBB, MI, DL, get(AArch64::MRS))
2198 .addImm(SrcReg->Encoding);
2199 int Offset = M.getStackProtectorGuardOffset();
2200 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
2201 // ldr xN, [xN, #offset]
2202 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2203 .addDef(Reg)
2205 .addImm(Offset / 8);
2206 } else if (Offset >= -256 && Offset <= 255) {
2207 // ldur xN, [xN, #offset]
2208 BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
2209 .addDef(Reg)
2211 .addImm(Offset);
2212 } else if (Offset >= -4095 && Offset <= 4095) {
2213 if (Offset > 0) {
2214 // add xN, xN, #offset
2215 BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
2216 .addDef(Reg)
2218 .addImm(Offset)
2219 .addImm(0);
2220 } else {
2221 // sub xN, xN, #offset
2222 BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
2223 .addDef(Reg)
2225 .addImm(-Offset)
2226 .addImm(0);
2227 }
2228 // ldr xN, [xN]
2229 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2230 .addDef(Reg)
2232 .addImm(0);
2233 } else {
2234 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2235 // than 23760.
2236 // It might be nice to use AArch64::MOVi32imm here, which would get
2237 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2238 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2239 // AArch64FrameLowering might help us find such a scratch register
2240 // though. If we failed to find a scratch register, we could emit a
2241 // stream of add instructions to build up the immediate. Or, we could try
2242 // to insert a AArch64::MOVi32imm before register allocation so that we
2243 // didn't need to scavenge for a scratch register.
2244 report_fatal_error("Unable to encode Stack Protector Guard Offset");
2245 }
2246 MBB.erase(MI);
2247 return true;
2248 }
2249
2250 const GlobalValue *GV =
2251 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
2252 const TargetMachine &TM = MBB.getParent()->getTarget();
2253 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2254 const unsigned char MO_NC = AArch64II::MO_NC;
2255
2256 if ((OpFlags & AArch64II::MO_GOT) != 0) {
2257 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
2258 .addGlobalAddress(GV, 0, OpFlags);
2259 if (Subtarget.isTargetILP32()) {
2260 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2261 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2262 .addDef(Reg32, RegState::Dead)
2264 .addImm(0)
2265 .addMemOperand(*MI.memoperands_begin())
2267 } else {
2268 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2270 .addImm(0)
2271 .addMemOperand(*MI.memoperands_begin());
2272 }
2273 } else if (TM.getCodeModel() == CodeModel::Large) {
2274 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
2275 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
2276 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
2277 .addImm(0);
2278 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2280 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
2281 .addImm(16);
2282 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2284 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
2285 .addImm(32);
2286 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2289 .addImm(48);
2290 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2292 .addImm(0)
2293 .addMemOperand(*MI.memoperands_begin());
2294 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2295 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
2296 .addGlobalAddress(GV, 0, OpFlags);
2297 } else {
2298 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
2299 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
2300 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2301 if (Subtarget.isTargetILP32()) {
2302 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2303 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2304 .addDef(Reg32, RegState::Dead)
2306 .addGlobalAddress(GV, 0, LoFlags)
2307 .addMemOperand(*MI.memoperands_begin())
2309 } else {
2310 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2312 .addGlobalAddress(GV, 0, LoFlags)
2313 .addMemOperand(*MI.memoperands_begin());
2314 }
2315 }
2316
2317 MBB.erase(MI);
2318
2319 return true;
2320}
2321
2322// Return true if this instruction simply sets its single destination register
2323// to zero. This is equivalent to a register rename of the zero-register.
2325 switch (MI.getOpcode()) {
2326 default:
2327 break;
2328 case AArch64::MOVZWi:
2329 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2330 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2331 assert(MI.getDesc().getNumOperands() == 3 &&
2332 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2333 return true;
2334 }
2335 break;
2336 case AArch64::ANDWri: // and Rd, Rzr, #imm
2337 return MI.getOperand(1).getReg() == AArch64::WZR;
2338 case AArch64::ANDXri:
2339 return MI.getOperand(1).getReg() == AArch64::XZR;
2340 case TargetOpcode::COPY:
2341 return MI.getOperand(1).getReg() == AArch64::WZR;
2342 }
2343 return false;
2344}
2345
2346// Return true if this instruction simply renames a general register without
2347// modifying bits.
2349 switch (MI.getOpcode()) {
2350 default:
2351 break;
2352 case TargetOpcode::COPY: {
2353 // GPR32 copies will by lowered to ORRXrs
2354 Register DstReg = MI.getOperand(0).getReg();
2355 return (AArch64::GPR32RegClass.contains(DstReg) ||
2356 AArch64::GPR64RegClass.contains(DstReg));
2357 }
2358 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2359 if (MI.getOperand(1).getReg() == AArch64::XZR) {
2360 assert(MI.getDesc().getNumOperands() == 4 &&
2361 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2362 return true;
2363 }
2364 break;
2365 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2366 if (MI.getOperand(2).getImm() == 0) {
2367 assert(MI.getDesc().getNumOperands() == 4 &&
2368 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2369 return true;
2370 }
2371 break;
2372 }
2373 return false;
2374}
2375
2376// Return true if this instruction simply renames a general register without
2377// modifying bits.
2379 switch (MI.getOpcode()) {
2380 default:
2381 break;
2382 case TargetOpcode::COPY: {
2383 Register DstReg = MI.getOperand(0).getReg();
2384 return AArch64::FPR128RegClass.contains(DstReg);
2385 }
2386 case AArch64::ORRv16i8:
2387 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2388 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2389 "invalid ORRv16i8 operands");
2390 return true;
2391 }
2392 break;
2393 }
2394 return false;
2395}
2396
2398 int &FrameIndex) const {
2399 switch (MI.getOpcode()) {
2400 default:
2401 break;
2402 case AArch64::LDRWui:
2403 case AArch64::LDRXui:
2404 case AArch64::LDRBui:
2405 case AArch64::LDRHui:
2406 case AArch64::LDRSui:
2407 case AArch64::LDRDui:
2408 case AArch64::LDRQui:
2409 case AArch64::LDR_PXI:
2410 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2411 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2412 FrameIndex = MI.getOperand(1).getIndex();
2413 return MI.getOperand(0).getReg();
2414 }
2415 break;
2416 }
2417
2418 return 0;
2419}
2420
2422 int &FrameIndex) const {
2423 switch (MI.getOpcode()) {
2424 default:
2425 break;
2426 case AArch64::STRWui:
2427 case AArch64::STRXui:
2428 case AArch64::STRBui:
2429 case AArch64::STRHui:
2430 case AArch64::STRSui:
2431 case AArch64::STRDui:
2432 case AArch64::STRQui:
2433 case AArch64::STR_PXI:
2434 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2435 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2436 FrameIndex = MI.getOperand(1).getIndex();
2437 return MI.getOperand(0).getReg();
2438 }
2439 break;
2440 }
2441 return 0;
2442}
2443
2444/// Check all MachineMemOperands for a hint to suppress pairing.
2446 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2447 return MMO->getFlags() & MOSuppressPair;
2448 });
2449}
2450
2451/// Set a flag on the first MachineMemOperand to suppress pairing.
2453 if (MI.memoperands_empty())
2454 return;
2455 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2456}
2457
2458/// Check all MachineMemOperands for a hint that the load/store is strided.
2460 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2461 return MMO->getFlags() & MOStridedAccess;
2462 });
2463}
2464
2466 switch (Opc) {
2467 default:
2468 return false;
2469 case AArch64::STURSi:
2470 case AArch64::STRSpre:
2471 case AArch64::STURDi:
2472 case AArch64::STRDpre:
2473 case AArch64::STURQi:
2474 case AArch64::STRQpre:
2475 case AArch64::STURBBi:
2476 case AArch64::STURHHi:
2477 case AArch64::STURWi:
2478 case AArch64::STRWpre:
2479 case AArch64::STURXi:
2480 case AArch64::STRXpre:
2481 case AArch64::LDURSi:
2482 case AArch64::LDRSpre:
2483 case AArch64::LDURDi:
2484 case AArch64::LDRDpre:
2485 case AArch64::LDURQi:
2486 case AArch64::LDRQpre:
2487 case AArch64::LDURWi:
2488 case AArch64::LDRWpre:
2489 case AArch64::LDURXi:
2490 case AArch64::LDRXpre:
2491 case AArch64::LDRSWpre:
2492 case AArch64::LDURSWi:
2493 case AArch64::LDURHHi:
2494 case AArch64::LDURBBi:
2495 case AArch64::LDURSBWi:
2496 case AArch64::LDURSHWi:
2497 return true;
2498 }
2499}
2500
2501std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2502 switch (Opc) {
2503 default: return {};
2504 case AArch64::PRFMui: return AArch64::PRFUMi;
2505 case AArch64::LDRXui: return AArch64::LDURXi;
2506 case AArch64::LDRWui: return AArch64::LDURWi;
2507 case AArch64::LDRBui: return AArch64::LDURBi;
2508 case AArch64::LDRHui: return AArch64::LDURHi;
2509 case AArch64::LDRSui: return AArch64::LDURSi;
2510 case AArch64::LDRDui: return AArch64::LDURDi;
2511 case AArch64::LDRQui: return AArch64::LDURQi;
2512 case AArch64::LDRBBui: return AArch64::LDURBBi;
2513 case AArch64::LDRHHui: return AArch64::LDURHHi;
2514 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2515 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2516 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2517 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2518 case AArch64::LDRSWui: return AArch64::LDURSWi;
2519 case AArch64::STRXui: return AArch64::STURXi;
2520 case AArch64::STRWui: return AArch64::STURWi;
2521 case AArch64::STRBui: return AArch64::STURBi;
2522 case AArch64::STRHui: return AArch64::STURHi;
2523 case AArch64::STRSui: return AArch64::STURSi;
2524 case AArch64::STRDui: return AArch64::STURDi;
2525 case AArch64::STRQui: return AArch64::STURQi;
2526 case AArch64::STRBBui: return AArch64::STURBBi;
2527 case AArch64::STRHHui: return AArch64::STURHHi;
2528 }
2529}
2530
2532 switch (Opc) {
2533 default:
2534 llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx");
2535 case AArch64::ADDG:
2536 case AArch64::LDAPURBi:
2537 case AArch64::LDAPURHi:
2538 case AArch64::LDAPURi:
2539 case AArch64::LDAPURSBWi:
2540 case AArch64::LDAPURSBXi:
2541 case AArch64::LDAPURSHWi:
2542 case AArch64::LDAPURSHXi:
2543 case AArch64::LDAPURSWi:
2544 case AArch64::LDAPURXi:
2545 case AArch64::LDR_PPXI:
2546 case AArch64::LDR_PXI:
2547 case AArch64::LDR_ZXI:
2548 case AArch64::LDR_ZZXI:
2549 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
2550 case AArch64::LDR_ZZZXI:
2551 case AArch64::LDR_ZZZZXI:
2552 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
2553 case AArch64::LDRBBui:
2554 case AArch64::LDRBui:
2555 case AArch64::LDRDui:
2556 case AArch64::LDRHHui:
2557 case AArch64::LDRHui:
2558 case AArch64::LDRQui:
2559 case AArch64::LDRSBWui:
2560 case AArch64::LDRSBXui:
2561 case AArch64::LDRSHWui:
2562 case AArch64::LDRSHXui:
2563 case AArch64::LDRSui:
2564 case AArch64::LDRSWui:
2565 case AArch64::LDRWui:
2566 case AArch64::LDRXui:
2567 case AArch64::LDURBBi:
2568 case AArch64::LDURBi:
2569 case AArch64::LDURDi:
2570 case AArch64::LDURHHi:
2571 case AArch64::LDURHi:
2572 case AArch64::LDURQi:
2573 case AArch64::LDURSBWi:
2574 case AArch64::LDURSBXi:
2575 case AArch64::LDURSHWi:
2576 case AArch64::LDURSHXi:
2577 case AArch64::LDURSi:
2578 case AArch64::LDURSWi:
2579 case AArch64::LDURWi:
2580 case AArch64::LDURXi:
2581 case AArch64::PRFMui:
2582 case AArch64::PRFUMi:
2583 case AArch64::ST2Gi:
2584 case AArch64::STGi:
2585 case AArch64::STLURBi:
2586 case AArch64::STLURHi:
2587 case AArch64::STLURWi:
2588 case AArch64::STLURXi:
2589 case AArch64::StoreSwiftAsyncContext:
2590 case AArch64::STR_PPXI:
2591 case AArch64::STR_PXI:
2592 case AArch64::STR_ZXI:
2593 case AArch64::STR_ZZXI:
2594 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
2595 case AArch64::STR_ZZZXI:
2596 case AArch64::STR_ZZZZXI:
2597 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
2598 case AArch64::STRBBui:
2599 case AArch64::STRBui:
2600 case AArch64::STRDui:
2601 case AArch64::STRHHui:
2602 case AArch64::STRHui:
2603 case AArch64::STRQui:
2604 case AArch64::STRSui:
2605 case AArch64::STRWui:
2606 case AArch64::STRXui:
2607 case AArch64::STURBBi:
2608 case AArch64::STURBi:
2609 case AArch64::STURDi:
2610 case AArch64::STURHHi:
2611 case AArch64::STURHi:
2612 case AArch64::STURQi:
2613 case AArch64::STURSi:
2614 case AArch64::STURWi:
2615 case AArch64::STURXi:
2616 case AArch64::STZ2Gi:
2617 case AArch64::STZGi:
2618 case AArch64::TAGPstack:
2619 return 2;
2620 case AArch64::LD1B_D_IMM:
2621 case AArch64::LD1B_H_IMM:
2622 case AArch64::LD1B_IMM:
2623 case AArch64::LD1B_S_IMM:
2624 case AArch64::LD1D_IMM:
2625 case AArch64::LD1H_D_IMM:
2626 case AArch64::LD1H_IMM:
2627 case AArch64::LD1H_S_IMM:
2628 case AArch64::LD1RB_D_IMM:
2629 case AArch64::LD1RB_H_IMM:
2630 case AArch64::LD1RB_IMM:
2631 case AArch64::LD1RB_S_IMM:
2632 case AArch64::LD1RD_IMM:
2633 case AArch64::LD1RH_D_IMM:
2634 case AArch64::LD1RH_IMM:
2635 case AArch64::LD1RH_S_IMM:
2636 case AArch64::LD1RSB_D_IMM:
2637 case AArch64::LD1RSB_H_IMM:
2638 case AArch64::LD1RSB_S_IMM:
2639 case AArch64::LD1RSH_D_IMM:
2640 case AArch64::LD1RSH_S_IMM:
2641 case AArch64::LD1RSW_IMM:
2642 case AArch64::LD1RW_D_IMM:
2643 case AArch64::LD1RW_IMM:
2644 case AArch64::LD1SB_D_IMM:
2645 case AArch64::LD1SB_H_IMM:
2646 case AArch64::LD1SB_S_IMM:
2647 case AArch64::LD1SH_D_IMM:
2648 case AArch64::LD1SH_S_IMM:
2649 case AArch64::LD1SW_D_IMM:
2650 case AArch64::LD1W_D_IMM:
2651 case AArch64::LD1W_IMM:
2652 case AArch64::LD2B_IMM:
2653 case AArch64::LD2D_IMM:
2654 case AArch64::LD2H_IMM:
2655 case AArch64::LD2W_IMM:
2656 case AArch64::LD3B_IMM:
2657 case AArch64::LD3D_IMM:
2658 case AArch64::LD3H_IMM:
2659 case AArch64::LD3W_IMM:
2660 case AArch64::LD4B_IMM:
2661 case AArch64::LD4D_IMM:
2662 case AArch64::LD4H_IMM:
2663 case AArch64::LD4W_IMM:
2664 case AArch64::LDG:
2665 case AArch64::LDNF1B_D_IMM:
2666 case AArch64::LDNF1B_H_IMM:
2667 case AArch64::LDNF1B_IMM:
2668 case AArch64::LDNF1B_S_IMM:
2669 case AArch64::LDNF1D_IMM:
2670 case AArch64::LDNF1H_D_IMM:
2671 case AArch64::LDNF1H_IMM:
2672 case AArch64::LDNF1H_S_IMM:
2673 case AArch64::LDNF1SB_D_IMM:
2674 case AArch64::LDNF1SB_H_IMM:
2675 case AArch64::LDNF1SB_S_IMM:
2676 case AArch64::LDNF1SH_D_IMM:
2677 case AArch64::LDNF1SH_S_IMM:
2678 case AArch64::LDNF1SW_D_IMM:
2679 case AArch64::LDNF1W_D_IMM:
2680 case AArch64::LDNF1W_IMM:
2681 case AArch64::LDNPDi:
2682 case AArch64::LDNPQi:
2683 case AArch64::LDNPSi:
2684 case AArch64::LDNPWi:
2685 case AArch64::LDNPXi:
2686 case AArch64::LDNT1B_ZRI:
2687 case AArch64::LDNT1D_ZRI:
2688 case AArch64::LDNT1H_ZRI:
2689 case AArch64::LDNT1W_ZRI:
2690 case AArch64::LDPDi:
2691 case AArch64::LDPQi:
2692 case AArch64::LDPSi:
2693 case AArch64::LDPWi:
2694 case AArch64::LDPXi:
2695 case AArch64::LDRBBpost:
2696 case AArch64::LDRBBpre:
2697 case AArch64::LDRBpost:
2698 case AArch64::LDRBpre:
2699 case AArch64::LDRDpost:
2700 case AArch64::LDRDpre:
2701 case AArch64::LDRHHpost:
2702 case AArch64::LDRHHpre:
2703 case AArch64::LDRHpost:
2704 case AArch64::LDRHpre:
2705 case AArch64::LDRQpost:
2706 case AArch64::LDRQpre:
2707 case AArch64::LDRSpost:
2708 case AArch64::LDRSpre:
2709 case AArch64::LDRWpost:
2710 case AArch64::LDRWpre:
2711 case AArch64::LDRXpost:
2712 case AArch64::LDRXpre:
2713 case AArch64::ST1B_D_IMM:
2714 case AArch64::ST1B_H_IMM:
2715 case AArch64::ST1B_IMM:
2716 case AArch64::ST1B_S_IMM:
2717 case AArch64::ST1D_IMM:
2718 case AArch64::ST1H_D_IMM:
2719 case AArch64::ST1H_IMM:
2720 case AArch64::ST1H_S_IMM:
2721 case AArch64::ST1W_D_IMM:
2722 case AArch64::ST1W_IMM:
2723 case AArch64::ST2B_IMM:
2724 case AArch64::ST2D_IMM:
2725 case AArch64::ST2H_IMM:
2726 case AArch64::ST2W_IMM:
2727 case AArch64::ST3B_IMM:
2728 case AArch64::ST3D_IMM:
2729 case AArch64::ST3H_IMM:
2730 case AArch64::ST3W_IMM:
2731 case AArch64::ST4B_IMM:
2732 case AArch64::ST4D_IMM:
2733 case AArch64::ST4H_IMM:
2734 case AArch64::ST4W_IMM:
2735 case AArch64::STGPi:
2736 case AArch64::STGPreIndex:
2737 case AArch64::STZGPreIndex:
2738 case AArch64::ST2GPreIndex:
2739 case AArch64::STZ2GPreIndex:
2740 case AArch64::STGPostIndex:
2741 case AArch64::STZGPostIndex:
2742 case AArch64::ST2GPostIndex:
2743 case AArch64::STZ2GPostIndex:
2744 case AArch64::STNPDi:
2745 case AArch64::STNPQi:
2746 case AArch64::STNPSi:
2747 case AArch64::STNPWi:
2748 case AArch64::STNPXi:
2749 case AArch64::STNT1B_ZRI:
2750 case AArch64::STNT1D_ZRI:
2751 case AArch64::STNT1H_ZRI:
2752 case AArch64::STNT1W_ZRI:
2753 case AArch64::STPDi:
2754 case AArch64::STPQi:
2755 case AArch64::STPSi:
2756 case AArch64::STPWi:
2757 case AArch64::STPXi:
2758 case AArch64::STRBBpost:
2759 case AArch64::STRBBpre:
2760 case AArch64::STRBpost:
2761 case AArch64::STRBpre:
2762 case AArch64::STRDpost:
2763 case AArch64::STRDpre:
2764 case AArch64::STRHHpost:
2765 case AArch64::STRHHpre:
2766 case AArch64::STRHpost:
2767 case AArch64::STRHpre:
2768 case AArch64::STRQpost:
2769 case AArch64::STRQpre:
2770 case AArch64::STRSpost:
2771 case AArch64::STRSpre:
2772 case AArch64::STRWpost:
2773 case AArch64::STRWpre:
2774 case AArch64::STRXpost:
2775 case AArch64::STRXpre:
2776 return 3;
2777 case AArch64::LDPDpost:
2778 case AArch64::LDPDpre:
2779 case AArch64::LDPQpost:
2780 case AArch64::LDPQpre:
2781 case AArch64::LDPSpost:
2782 case AArch64::LDPSpre:
2783 case AArch64::LDPWpost:
2784 case AArch64::LDPWpre:
2785 case AArch64::LDPXpost:
2786 case AArch64::LDPXpre:
2787 case AArch64::STGPpre:
2788 case AArch64::STGPpost:
2789 case AArch64::STPDpost:
2790 case AArch64::STPDpre:
2791 case AArch64::STPQpost:
2792 case AArch64::STPQpre:
2793 case AArch64::STPSpost:
2794 case AArch64::STPSpre:
2795 case AArch64::STPWpost:
2796 case AArch64::STPWpre:
2797 case AArch64::STPXpost:
2798 case AArch64::STPXpre:
2799 return 4;
2800 }
2801}
2802
2804 switch (MI.getOpcode()) {
2805 default:
2806 return false;
2807 // Scaled instructions.
2808 case AArch64::STRSui:
2809 case AArch64::STRDui:
2810 case AArch64::STRQui:
2811 case AArch64::STRXui:
2812 case AArch64::STRWui:
2813 case AArch64::LDRSui:
2814 case AArch64::LDRDui:
2815 case AArch64::LDRQui:
2816 case AArch64::LDRXui:
2817 case AArch64::LDRWui:
2818 case AArch64::LDRSWui:
2819 // Unscaled instructions.
2820 case AArch64::STURSi:
2821 case AArch64::STRSpre:
2822 case AArch64::STURDi:
2823 case AArch64::STRDpre:
2824 case AArch64::STURQi:
2825 case AArch64::STRQpre:
2826 case AArch64::STURWi:
2827 case AArch64::STRWpre:
2828 case AArch64::STURXi:
2829 case AArch64::STRXpre:
2830 case AArch64::LDURSi:
2831 case AArch64::LDRSpre:
2832 case AArch64::LDURDi:
2833 case AArch64::LDRDpre:
2834 case AArch64::LDURQi:
2835 case AArch64::LDRQpre:
2836 case AArch64::LDURWi:
2837 case AArch64::LDRWpre:
2838 case AArch64::LDURXi:
2839 case AArch64::LDRXpre:
2840 case AArch64::LDURSWi:
2841 case AArch64::LDRSWpre:
2842 // SVE instructions.
2843 case AArch64::LDR_ZXI:
2844 case AArch64::STR_ZXI:
2845 return true;
2846 }
2847}
2848
2850 switch (MI.getOpcode()) {
2851 default:
2852 assert((!MI.isCall() || !MI.isReturn()) &&
2853 "Unexpected instruction - was a new tail call opcode introduced?");
2854 return false;
2855 case AArch64::TCRETURNdi:
2856 case AArch64::TCRETURNri:
2857 case AArch64::TCRETURNrix16x17:
2858 case AArch64::TCRETURNrix17:
2859 case AArch64::TCRETURNrinotx16:
2860 case AArch64::TCRETURNriALL:
2861 case AArch64::AUTH_TCRETURN:
2862 case AArch64::AUTH_TCRETURN_BTI:
2863 return true;
2864 }
2865}
2866
2868 switch (Opc) {
2869 default:
2870 llvm_unreachable("Opcode has no flag setting equivalent!");
2871 // 32-bit cases:
2872 case AArch64::ADDWri:
2873 return AArch64::ADDSWri;
2874 case AArch64::ADDWrr:
2875 return AArch64::ADDSWrr;
2876 case AArch64::ADDWrs:
2877 return AArch64::ADDSWrs;
2878 case AArch64::ADDWrx:
2879 return AArch64::ADDSWrx;
2880 case AArch64::ANDWri:
2881 return AArch64::ANDSWri;
2882 case AArch64::ANDWrr:
2883 return AArch64::ANDSWrr;
2884 case AArch64::ANDWrs:
2885 return AArch64::ANDSWrs;
2886 case AArch64::BICWrr:
2887 return AArch64::BICSWrr;
2888 case AArch64::BICWrs:
2889 return AArch64::BICSWrs;
2890 case AArch64::SUBWri:
2891 return AArch64::SUBSWri;
2892 case AArch64::SUBWrr:
2893 return AArch64::SUBSWrr;
2894 case AArch64::SUBWrs:
2895 return AArch64::SUBSWrs;
2896 case AArch64::SUBWrx:
2897 return AArch64::SUBSWrx;
2898 // 64-bit cases:
2899 case AArch64::ADDXri:
2900 return AArch64::ADDSXri;
2901 case AArch64::ADDXrr:
2902 return AArch64::ADDSXrr;
2903 case AArch64::ADDXrs:
2904 return AArch64::ADDSXrs;
2905 case AArch64::ADDXrx:
2906 return AArch64::ADDSXrx;
2907 case AArch64::ANDXri:
2908 return AArch64::ANDSXri;
2909 case AArch64::ANDXrr:
2910 return AArch64::ANDSXrr;
2911 case AArch64::ANDXrs:
2912 return AArch64::ANDSXrs;
2913 case AArch64::BICXrr:
2914 return AArch64::BICSXrr;
2915 case AArch64::BICXrs:
2916 return AArch64::BICSXrs;
2917 case AArch64::SUBXri:
2918 return AArch64::SUBSXri;
2919 case AArch64::SUBXrr:
2920 return AArch64::SUBSXrr;
2921 case AArch64::SUBXrs:
2922 return AArch64::SUBSXrs;
2923 case AArch64::SUBXrx:
2924 return AArch64::SUBSXrx;
2925 // SVE instructions:
2926 case AArch64::AND_PPzPP:
2927 return AArch64::ANDS_PPzPP;
2928 case AArch64::BIC_PPzPP:
2929 return AArch64::BICS_PPzPP;
2930 case AArch64::EOR_PPzPP:
2931 return AArch64::EORS_PPzPP;
2932 case AArch64::NAND_PPzPP:
2933 return AArch64::NANDS_PPzPP;
2934 case AArch64::NOR_PPzPP:
2935 return AArch64::NORS_PPzPP;
2936 case AArch64::ORN_PPzPP:
2937 return AArch64::ORNS_PPzPP;
2938 case AArch64::ORR_PPzPP:
2939 return AArch64::ORRS_PPzPP;
2940 case AArch64::BRKA_PPzP:
2941 return AArch64::BRKAS_PPzP;
2942 case AArch64::BRKPA_PPzPP:
2943 return AArch64::BRKPAS_PPzPP;
2944 case AArch64::BRKB_PPzP:
2945 return AArch64::BRKBS_PPzP;
2946 case AArch64::BRKPB_PPzPP:
2947 return AArch64::BRKPBS_PPzPP;
2948 case AArch64::BRKN_PPzP:
2949 return AArch64::BRKNS_PPzP;
2950 case AArch64::RDFFR_PPz:
2951 return AArch64::RDFFRS_PPz;
2952 case AArch64::PTRUE_B:
2953 return AArch64::PTRUES_B;
2954 }
2955}
2956
2957// Is this a candidate for ld/st merging or pairing? For example, we don't
2958// touch volatiles or load/stores that have a hint to avoid pair formation.
2960
2961 bool IsPreLdSt = isPreLdSt(MI);
2962
2963 // If this is a volatile load/store, don't mess with it.
2964 if (MI.hasOrderedMemoryRef())
2965 return false;
2966
2967 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
2968 // For Pre-inc LD/ST, the operand is shifted by one.
2969 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
2970 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
2971 "Expected a reg or frame index operand.");
2972
2973 // For Pre-indexed addressing quadword instructions, the third operand is the
2974 // immediate value.
2975 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
2976
2977 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
2978 return false;
2979
2980 // Can't merge/pair if the instruction modifies the base register.
2981 // e.g., ldr x0, [x0]
2982 // This case will never occur with an FI base.
2983 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
2984 // STR<S,D,Q,W,X>pre, it can be merged.
2985 // For example:
2986 // ldr q0, [x11, #32]!
2987 // ldr q1, [x11, #16]
2988 // to
2989 // ldp q0, q1, [x11, #32]!
2990 if (MI.getOperand(1).isReg() && !IsPreLdSt) {
2991 Register BaseReg = MI.getOperand(1).getReg();
2993 if (MI.modifiesRegister(BaseReg, TRI))
2994 return false;
2995 }
2996
2997 // Pairing SVE fills/spills is only valid for little-endian targets that
2998 // implement VLS 128.
2999 switch (MI.getOpcode()) {
3000 default:
3001 break;
3002 case AArch64::LDR_ZXI:
3003 case AArch64::STR_ZXI:
3004 if (!Subtarget.isLittleEndian() ||
3005 Subtarget.getSVEVectorSizeInBits() != 128)
3006 return false;
3007 }
3008
3009 // Check if this load/store has a hint to avoid pair formation.
3010 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
3012 return false;
3013
3014 // Do not pair any callee-save store/reload instructions in the
3015 // prologue/epilogue if the CFI information encoded the operations as separate
3016 // instructions, as that will cause the size of the actual prologue to mismatch
3017 // with the prologue size recorded in the Windows CFI.
3018 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
3019 bool NeedsWinCFI = MAI->usesWindowsCFI() &&
3020 MI.getMF()->getFunction().needsUnwindTableEntry();
3021 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
3023 return false;
3024
3025 // On some CPUs quad load/store pairs are slower than two single load/stores.
3026 if (Subtarget.isPaired128Slow()) {
3027 switch (MI.getOpcode()) {
3028 default:
3029 break;
3030 case AArch64::LDURQi:
3031 case AArch64::STURQi:
3032 case AArch64::LDRQui:
3033 case AArch64::STRQui:
3034 return false;
3035 }
3036 }
3037
3038 return true;
3039}
3040
3043 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
3044 const TargetRegisterInfo *TRI) const {
3045 if (!LdSt.mayLoadOrStore())
3046 return false;
3047
3048 const MachineOperand *BaseOp;
3049 TypeSize WidthN(0, false);
3050 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
3051 WidthN, TRI))
3052 return false;
3053 // The maximum vscale is 16 under AArch64, return the maximal extent for the
3054 // vector.
3055 Width = LocationSize::precise(WidthN);
3056 BaseOps.push_back(BaseOp);
3057 return true;
3058}
3059
3060std::optional<ExtAddrMode>
3062 const TargetRegisterInfo *TRI) const {
3063 const MachineOperand *Base; // Filled with the base operand of MI.
3064 int64_t Offset; // Filled with the offset of MI.
3065 bool OffsetIsScalable;
3066 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
3067 return std::nullopt;
3068
3069 if (!Base->isReg())
3070 return std::nullopt;
3071 ExtAddrMode AM;
3072 AM.BaseReg = Base->getReg();
3073 AM.Displacement = Offset;
3074 AM.ScaledReg = 0;
3075 AM.Scale = 0;
3076 return AM;
3077}
3078
3080 Register Reg,
3081 const MachineInstr &AddrI,
3082 ExtAddrMode &AM) const {
3083 // Filter out instructions into which we cannot fold.
3084 unsigned NumBytes;
3085 int64_t OffsetScale = 1;
3086 switch (MemI.getOpcode()) {
3087 default:
3088 return false;
3089
3090 case AArch64::LDURQi:
3091 case AArch64::STURQi:
3092 NumBytes = 16;
3093 break;
3094
3095 case AArch64::LDURDi:
3096 case AArch64::STURDi:
3097 case AArch64::LDURXi:
3098 case AArch64::STURXi:
3099 NumBytes = 8;
3100 break;
3101
3102 case AArch64::LDURWi:
3103 case AArch64::LDURSWi:
3104 case AArch64::STURWi:
3105 NumBytes = 4;
3106 break;
3107
3108 case AArch64::LDURHi:
3109 case AArch64::STURHi:
3110 case AArch64::LDURHHi:
3111 case AArch64::STURHHi:
3112 case AArch64::LDURSHXi:
3113 case AArch64::LDURSHWi:
3114 NumBytes = 2;
3115 break;
3116
3117 case AArch64::LDRBroX:
3118 case AArch64::LDRBBroX:
3119 case AArch64::LDRSBXroX:
3120 case AArch64::LDRSBWroX:
3121 case AArch64::STRBroX:
3122 case AArch64::STRBBroX:
3123 case AArch64::LDURBi:
3124 case AArch64::LDURBBi:
3125 case AArch64::LDURSBXi:
3126 case AArch64::LDURSBWi:
3127 case AArch64::STURBi:
3128 case AArch64::STURBBi:
3129 case AArch64::LDRBui:
3130 case AArch64::LDRBBui:
3131 case AArch64::LDRSBXui:
3132 case AArch64::LDRSBWui:
3133 case AArch64::STRBui:
3134 case AArch64::STRBBui:
3135 NumBytes = 1;
3136 break;
3137
3138 case AArch64::LDRQroX:
3139 case AArch64::STRQroX:
3140 case AArch64::LDRQui:
3141 case AArch64::STRQui:
3142 NumBytes = 16;
3143 OffsetScale = 16;
3144 break;
3145
3146 case AArch64::LDRDroX:
3147 case AArch64::STRDroX:
3148 case AArch64::LDRXroX:
3149 case AArch64::STRXroX:
3150 case AArch64::LDRDui:
3151 case AArch64::STRDui:
3152 case AArch64::LDRXui:
3153 case AArch64::STRXui:
3154 NumBytes = 8;
3155 OffsetScale = 8;
3156 break;
3157
3158 case AArch64::LDRWroX:
3159 case AArch64::LDRSWroX:
3160 case AArch64::STRWroX:
3161 case AArch64::LDRWui:
3162 case AArch64::LDRSWui:
3163 case AArch64::STRWui:
3164 NumBytes = 4;
3165 OffsetScale = 4;
3166 break;
3167
3168 case AArch64::LDRHroX:
3169 case AArch64::STRHroX:
3170 case AArch64::LDRHHroX:
3171 case AArch64::STRHHroX:
3172 case AArch64::LDRSHXroX:
3173 case AArch64::LDRSHWroX:
3174 case AArch64::LDRHui:
3175 case AArch64::STRHui:
3176 case AArch64::LDRHHui:
3177 case AArch64::STRHHui:
3178 case AArch64::LDRSHXui:
3179 case AArch64::LDRSHWui:
3180 NumBytes = 2;
3181 OffsetScale = 2;
3182 break;
3183 }
3184
3185 // Check the fold operand is not the loaded/stored value.
3186 const MachineOperand &BaseRegOp = MemI.getOperand(0);
3187 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
3188 return false;
3189
3190 // Handle memory instructions with a [Reg, Reg] addressing mode.
3191 if (MemI.getOperand(2).isReg()) {
3192 // Bail if the addressing mode already includes extension of the offset
3193 // register.
3194 if (MemI.getOperand(3).getImm())
3195 return false;
3196
3197 // Check if we actually have a scaled offset.
3198 if (MemI.getOperand(4).getImm() == 0)
3199 OffsetScale = 1;
3200
3201 // If the address instructions is folded into the base register, then the
3202 // addressing mode must not have a scale. Then we can swap the base and the
3203 // scaled registers.
3204 if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
3205 return false;
3206
3207 switch (AddrI.getOpcode()) {
3208 default:
3209 return false;
3210
3211 case AArch64::SBFMXri:
3212 // sxtw Xa, Wm
3213 // ldr Xd, [Xn, Xa, lsl #N]
3214 // ->
3215 // ldr Xd, [Xn, Wm, sxtw #N]
3216 if (AddrI.getOperand(2).getImm() != 0 ||
3217 AddrI.getOperand(3).getImm() != 31)
3218 return false;
3219
3220 AM.BaseReg = MemI.getOperand(1).getReg();
3221 if (AM.BaseReg == Reg)
3222 AM.BaseReg = MemI.getOperand(2).getReg();
3223 AM.ScaledReg = AddrI.getOperand(1).getReg();
3224 AM.Scale = OffsetScale;
3225 AM.Displacement = 0;
3227 return true;
3228
3229 case TargetOpcode::SUBREG_TO_REG: {
3230 // mov Wa, Wm
3231 // ldr Xd, [Xn, Xa, lsl #N]
3232 // ->
3233 // ldr Xd, [Xn, Wm, uxtw #N]
3234
3235 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
3236 if (AddrI.getOperand(1).getImm() != 0 ||
3237 AddrI.getOperand(3).getImm() != AArch64::sub_32)
3238 return false;
3239
3240 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
3241 Register OffsetReg = AddrI.getOperand(2).getReg();
3242 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
3243 return false;
3244
3245 const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
3246 if (DefMI.getOpcode() != AArch64::ORRWrs ||
3247 DefMI.getOperand(1).getReg() != AArch64::WZR ||
3248 DefMI.getOperand(3).getImm() != 0)
3249 return false;
3250
3251 AM.BaseReg = MemI.getOperand(1).getReg();
3252 if (AM.BaseReg == Reg)
3253 AM.BaseReg = MemI.getOperand(2).getReg();
3254 AM.ScaledReg = DefMI.getOperand(2).getReg();
3255 AM.Scale = OffsetScale;
3256 AM.Displacement = 0;
3258 return true;
3259 }
3260 }
3261 }
3262
3263 // Handle memory instructions with a [Reg, #Imm] addressing mode.
3264
3265 // Check we are not breaking a potential conversion to an LDP.
3266 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
3267 int64_t NewOffset) -> bool {
3268 int64_t MinOffset, MaxOffset;
3269 switch (NumBytes) {
3270 default:
3271 return true;
3272 case 4:
3273 MinOffset = -256;
3274 MaxOffset = 252;
3275 break;
3276 case 8:
3277 MinOffset = -512;
3278 MaxOffset = 504;
3279 break;
3280 case 16:
3281 MinOffset = -1024;
3282 MaxOffset = 1008;
3283 break;
3284 }
3285 return OldOffset < MinOffset || OldOffset > MaxOffset ||
3286 (NewOffset >= MinOffset && NewOffset <= MaxOffset);
3287 };
3288 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
3289 int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
3290 int64_t NewOffset = OldOffset + Disp;
3291 if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
3292 return false;
3293 // If the old offset would fit into an LDP, but the new offset wouldn't,
3294 // bail out.
3295 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
3296 return false;
3297 AM.BaseReg = AddrI.getOperand(1).getReg();
3298 AM.ScaledReg = 0;
3299 AM.Scale = 0;
3300 AM.Displacement = NewOffset;
3302 return true;
3303 };
3304
3305 auto canFoldAddRegIntoAddrMode =
3306 [&](int64_t Scale,
3308 if (MemI.getOperand(2).getImm() != 0)
3309 return false;
3310 if ((unsigned)Scale != Scale)
3311 return false;
3312 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
3313 return false;
3314 AM.BaseReg = AddrI.getOperand(1).getReg();
3315 AM.ScaledReg = AddrI.getOperand(2).getReg();
3316 AM.Scale = Scale;
3317 AM.Displacement = 0;
3318 AM.Form = Form;
3319 return true;
3320 };
3321
3322 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
3323 unsigned Opcode = MemI.getOpcode();
3324 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
3325 Subtarget.isSTRQroSlow();
3326 };
3327
3328 int64_t Disp = 0;
3329 const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
3330 switch (AddrI.getOpcode()) {
3331 default:
3332 return false;
3333
3334 case AArch64::ADDXri:
3335 // add Xa, Xn, #N
3336 // ldr Xd, [Xa, #M]
3337 // ->
3338 // ldr Xd, [Xn, #N'+M]
3339 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3340 return canFoldAddSubImmIntoAddrMode(Disp);
3341
3342 case AArch64::SUBXri:
3343 // sub Xa, Xn, #N
3344 // ldr Xd, [Xa, #M]
3345 // ->
3346 // ldr Xd, [Xn, #N'+M]
3347 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3348 return canFoldAddSubImmIntoAddrMode(-Disp);
3349
3350 case AArch64::ADDXrs: {
3351 // add Xa, Xn, Xm, lsl #N
3352 // ldr Xd, [Xa]
3353 // ->
3354 // ldr Xd, [Xn, Xm, lsl #N]
3355
3356 // Don't fold the add if the result would be slower, unless optimising for
3357 // size.
3358 unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3360 return false;
3361 Shift = AArch64_AM::getShiftValue(Shift);
3362 if (!OptSize) {
3363 if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
3364 return false;
3365 if (avoidSlowSTRQ(MemI))
3366 return false;
3367 }
3368 return canFoldAddRegIntoAddrMode(1ULL << Shift);
3369 }
3370
3371 case AArch64::ADDXrr:
3372 // add Xa, Xn, Xm
3373 // ldr Xd, [Xa]
3374 // ->
3375 // ldr Xd, [Xn, Xm, lsl #0]
3376
3377 // Don't fold the add if the result would be slower, unless optimising for
3378 // size.
3379 if (!OptSize && avoidSlowSTRQ(MemI))
3380 return false;
3381 return canFoldAddRegIntoAddrMode(1);
3382
3383 case AArch64::ADDXrx:
3384 // add Xa, Xn, Wm, {s,u}xtw #N
3385 // ldr Xd, [Xa]
3386 // ->
3387 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
3388
3389 // Don't fold the add if the result would be slower, unless optimising for
3390 // size.
3391 if (!OptSize && avoidSlowSTRQ(MemI))
3392 return false;
3393
3394 // Can fold only sign-/zero-extend of a word.
3395 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3397 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3398 return false;
3399
3400 return canFoldAddRegIntoAddrMode(
3401 1ULL << AArch64_AM::getArithShiftValue(Imm),
3404 }
3405}
3406
3407// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3408// return the opcode of an instruction performing the same operation, but using
3409// the [Reg, Reg] addressing mode.
3410static unsigned regOffsetOpcode(unsigned Opcode) {
3411 switch (Opcode) {
3412 default:
3413 llvm_unreachable("Address folding not implemented for instruction");
3414
3415 case AArch64::LDURQi:
3416 case AArch64::LDRQui:
3417 return AArch64::LDRQroX;
3418 case AArch64::STURQi:
3419 case AArch64::STRQui:
3420 return AArch64::STRQroX;
3421 case AArch64::LDURDi:
3422 case AArch64::LDRDui:
3423 return AArch64::LDRDroX;
3424 case AArch64::STURDi:
3425 case AArch64::STRDui:
3426 return AArch64::STRDroX;
3427 case AArch64::LDURXi:
3428 case AArch64::LDRXui:
3429 return AArch64::LDRXroX;
3430 case AArch64::STURXi:
3431 case AArch64::STRXui:
3432 return AArch64::STRXroX;
3433 case AArch64::LDURWi:
3434 case AArch64::LDRWui:
3435 return AArch64::LDRWroX;
3436 case AArch64::LDURSWi:
3437 case AArch64::LDRSWui:
3438 return AArch64::LDRSWroX;
3439 case AArch64::STURWi:
3440 case AArch64::STRWui:
3441 return AArch64::STRWroX;
3442 case AArch64::LDURHi:
3443 case AArch64::LDRHui:
3444 return AArch64::LDRHroX;
3445 case AArch64::STURHi:
3446 case AArch64::STRHui:
3447 return AArch64::STRHroX;
3448 case AArch64::LDURHHi:
3449 case AArch64::LDRHHui:
3450 return AArch64::LDRHHroX;
3451 case AArch64::STURHHi:
3452 case AArch64::STRHHui:
3453 return AArch64::STRHHroX;
3454 case AArch64::LDURSHXi:
3455 case AArch64::LDRSHXui:
3456 return AArch64::LDRSHXroX;
3457 case AArch64::LDURSHWi:
3458 case AArch64::LDRSHWui:
3459 return AArch64::LDRSHWroX;
3460 case AArch64::LDURBi:
3461 case AArch64::LDRBui:
3462 return AArch64::LDRBroX;
3463 case AArch64::LDURBBi:
3464 case AArch64::LDRBBui:
3465 return AArch64::LDRBBroX;
3466 case AArch64::LDURSBXi:
3467 case AArch64::LDRSBXui:
3468 return AArch64::LDRSBXroX;
3469 case AArch64::LDURSBWi:
3470 case AArch64::LDRSBWui:
3471 return AArch64::LDRSBWroX;
3472 case AArch64::STURBi:
3473 case AArch64::STRBui:
3474 return AArch64::STRBroX;
3475 case AArch64::STURBBi:
3476 case AArch64::STRBBui:
3477 return AArch64::STRBBroX;
3478 }
3479}
3480
3481// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3482// the opcode of an instruction performing the same operation, but using the
3483// [Reg, #Imm] addressing mode with scaled offset.
3484unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3485 switch (Opcode) {
3486 default:
3487 llvm_unreachable("Address folding not implemented for instruction");
3488
3489 case AArch64::LDURQi:
3490 Scale = 16;
3491 return AArch64::LDRQui;
3492 case AArch64::STURQi:
3493 Scale = 16;
3494 return AArch64::STRQui;
3495 case AArch64::LDURDi:
3496 Scale = 8;
3497 return AArch64::LDRDui;
3498 case AArch64::STURDi:
3499 Scale = 8;
3500 return AArch64::STRDui;
3501 case AArch64::LDURXi:
3502 Scale = 8;
3503 return AArch64::LDRXui;
3504 case AArch64::STURXi:
3505 Scale = 8;
3506 return AArch64::STRXui;
3507 case AArch64::LDURWi:
3508 Scale = 4;
3509 return AArch64::LDRWui;
3510 case AArch64::LDURSWi:
3511 Scale = 4;
3512 return AArch64::LDRSWui;
3513 case AArch64::STURWi:
3514 Scale = 4;
3515 return AArch64::STRWui;
3516 case AArch64::LDURHi:
3517 Scale = 2;
3518 return AArch64::LDRHui;
3519 case AArch64::STURHi:
3520 Scale = 2;
3521 return AArch64::STRHui;
3522 case AArch64::LDURHHi:
3523 Scale = 2;
3524 return AArch64::LDRHHui;
3525 case AArch64::STURHHi:
3526 Scale = 2;
3527 return AArch64::STRHHui;
3528 case AArch64::LDURSHXi:
3529 Scale = 2;
3530 return AArch64::LDRSHXui;
3531 case AArch64::LDURSHWi:
3532 Scale = 2;
3533 return AArch64::LDRSHWui;
3534 case AArch64::LDURBi:
3535 Scale = 1;
3536 return AArch64::LDRBui;
3537 case AArch64::LDURBBi:
3538 Scale = 1;
3539 return AArch64::LDRBBui;
3540 case AArch64::LDURSBXi:
3541 Scale = 1;
3542 return AArch64::LDRSBXui;
3543 case AArch64::LDURSBWi:
3544 Scale = 1;
3545 return AArch64::LDRSBWui;
3546 case AArch64::STURBi:
3547 Scale = 1;
3548 return AArch64::STRBui;
3549 case AArch64::STURBBi:
3550 Scale = 1;
3551 return AArch64::STRBBui;
3552 case AArch64::LDRQui:
3553 case AArch64::STRQui:
3554 Scale = 16;
3555 return Opcode;
3556 case AArch64::LDRDui:
3557 case AArch64::STRDui:
3558 case AArch64::LDRXui:
3559 case AArch64::STRXui:
3560 Scale = 8;
3561 return Opcode;
3562 case AArch64::LDRWui:
3563 case AArch64::LDRSWui:
3564 case AArch64::STRWui:
3565 Scale = 4;
3566 return Opcode;
3567 case AArch64::LDRHui:
3568 case AArch64::STRHui:
3569 case AArch64::LDRHHui:
3570 case AArch64::STRHHui:
3571 case AArch64::LDRSHXui:
3572 case AArch64::LDRSHWui:
3573 Scale = 2;
3574 return Opcode;
3575 case AArch64::LDRBui:
3576 case AArch64::LDRBBui:
3577 case AArch64::LDRSBXui:
3578 case AArch64::LDRSBWui:
3579 case AArch64::STRBui:
3580 case AArch64::STRBBui:
3581 Scale = 1;
3582 return Opcode;
3583 }
3584}
3585
3586// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3587// the opcode of an instruction performing the same operation, but using the
3588// [Reg, #Imm] addressing mode with unscaled offset.
3589unsigned unscaledOffsetOpcode(unsigned Opcode) {
3590 switch (Opcode) {
3591 default:
3592 llvm_unreachable("Address folding not implemented for instruction");
3593
3594 case AArch64::LDURQi:
3595 case AArch64::STURQi:
3596 case AArch64::LDURDi:
3597 case AArch64::STURDi:
3598 case AArch64::LDURXi:
3599 case AArch64::STURXi:
3600 case AArch64::LDURWi:
3601 case AArch64::LDURSWi:
3602 case AArch64::STURWi:
3603 case AArch64::LDURHi:
3604 case AArch64::STURHi:
3605 case AArch64::LDURHHi:
3606 case AArch64::STURHHi:
3607 case AArch64::LDURSHXi:
3608 case AArch64::LDURSHWi:
3609 case AArch64::LDURBi:
3610 case AArch64::STURBi:
3611 case AArch64::LDURBBi:
3612 case AArch64::STURBBi:
3613 case AArch64::LDURSBWi:
3614 case AArch64::LDURSBXi:
3615 return Opcode;
3616 case AArch64::LDRQui:
3617 return AArch64::LDURQi;
3618 case AArch64::STRQui:
3619 return AArch64::STURQi;
3620 case AArch64::LDRDui:
3621 return AArch64::LDURDi;
3622 case AArch64::STRDui:
3623 return AArch64::STURDi;
3624 case AArch64::LDRXui:
3625 return AArch64::LDURXi;
3626 case AArch64::STRXui:
3627 return AArch64::STURXi;
3628 case AArch64::LDRWui:
3629 return AArch64::LDURWi;
3630 case AArch64::LDRSWui:
3631 return AArch64::LDURSWi;
3632 case AArch64::STRWui:
3633 return AArch64::STURWi;
3634 case AArch64::LDRHui:
3635 return AArch64::LDURHi;
3636 case AArch64::STRHui:
3637 return AArch64::STURHi;
3638 case AArch64::LDRHHui:
3639 return AArch64::LDURHHi;
3640 case AArch64::STRHHui:
3641 return AArch64::STURHHi;
3642 case AArch64::LDRSHXui:
3643 return AArch64::LDURSHXi;
3644 case AArch64::LDRSHWui:
3645 return AArch64::LDURSHWi;
3646 case AArch64::LDRBBui:
3647 return AArch64::LDURBBi;
3648 case AArch64::LDRBui:
3649 return AArch64::LDURBi;
3650 case AArch64::STRBBui:
3651 return AArch64::STURBBi;
3652 case AArch64::STRBui:
3653 return AArch64::STURBi;
3654 case AArch64::LDRSBWui:
3655 return AArch64::LDURSBWi;
3656 case AArch64::LDRSBXui:
3657 return AArch64::LDURSBXi;
3658 }
3659}
3660
3661// Given the opcode of a memory load/store instruction, return the opcode of an
3662// instruction performing the same operation, but using
3663// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3664// offset register.
3665static unsigned offsetExtendOpcode(unsigned Opcode) {
3666 switch (Opcode) {
3667 default:
3668 llvm_unreachable("Address folding not implemented for instruction");
3669
3670 case AArch64::LDRQroX:
3671 case AArch64::LDURQi:
3672 case AArch64::LDRQui:
3673 return AArch64::LDRQroW;
3674 case AArch64::STRQroX:
3675 case AArch64::STURQi:
3676 case AArch64::STRQui:
3677 return AArch64::STRQroW;
3678 case AArch64::LDRDroX:
3679 case AArch64::LDURDi:
3680 case AArch64::LDRDui:
3681 return AArch64::LDRDroW;
3682 case AArch64::STRDroX:
3683 case AArch64::STURDi:
3684 case AArch64::STRDui:
3685 return AArch64::STRDroW;
3686 case AArch64::LDRXroX:
3687 case AArch64::LDURXi:
3688 case AArch64::LDRXui:
3689 return AArch64::LDRXroW;
3690 case AArch64::STRXroX:
3691 case AArch64::STURXi:
3692 case AArch64::STRXui:
3693 return AArch64::STRXroW;
3694 case AArch64::LDRWroX:
3695 case AArch64::LDURWi:
3696 case AArch64::LDRWui:
3697 return AArch64::LDRWroW;
3698 case AArch64::LDRSWroX:
3699 case AArch64::LDURSWi:
3700 case AArch64::LDRSWui:
3701 return AArch64::LDRSWroW;
3702 case AArch64::STRWroX:
3703 case AArch64::STURWi:
3704 case AArch64::STRWui:
3705 return AArch64::STRWroW;
3706 case AArch64::LDRHroX:
3707 case AArch64::LDURHi:
3708 case AArch64::LDRHui:
3709 return AArch64::LDRHroW;
3710 case AArch64::STRHroX:
3711 case AArch64::STURHi:
3712 case AArch64::STRHui:
3713 return AArch64::STRHroW;
3714 case AArch64::LDRHHroX:
3715 case AArch64::LDURHHi:
3716 case AArch64::LDRHHui:
3717 return AArch64::LDRHHroW;
3718 case AArch64::STRHHroX:
3719 case AArch64::STURHHi:
3720 case AArch64::STRHHui:
3721 return AArch64::STRHHroW;
3722 case AArch64::LDRSHXroX:
3723 case AArch64::LDURSHXi:
3724 case AArch64::LDRSHXui:
3725 return AArch64::LDRSHXroW;
3726 case AArch64::LDRSHWroX:
3727 case AArch64::LDURSHWi:
3728 case AArch64::LDRSHWui:
3729 return AArch64::LDRSHWroW;
3730 case AArch64::LDRBroX:
3731 case AArch64::LDURBi:
3732 case AArch64::LDRBui:
3733 return AArch64::LDRBroW;
3734 case AArch64::LDRBBroX:
3735 case AArch64::LDURBBi:
3736 case AArch64::LDRBBui:
3737 return AArch64::LDRBBroW;
3738 case AArch64::LDRSBXroX:
3739 case AArch64::LDURSBXi:
3740 case AArch64::LDRSBXui:
3741 return AArch64::LDRSBXroW;
3742 case AArch64::LDRSBWroX:
3743 case AArch64::LDURSBWi:
3744 case AArch64::LDRSBWui:
3745 return AArch64::LDRSBWroW;
3746 case AArch64::STRBroX:
3747 case AArch64::STURBi:
3748 case AArch64::STRBui:
3749 return AArch64::STRBroW;
3750 case AArch64::STRBBroX:
3751 case AArch64::STURBBi:
3752 case AArch64::STRBBui:
3753 return AArch64::STRBBroW;
3754 }
3755}
3756
3758 const ExtAddrMode &AM) const {
3759
3760 const DebugLoc &DL = MemI.getDebugLoc();
3761 MachineBasicBlock &MBB = *MemI.getParent();
3763
3765 if (AM.ScaledReg) {
3766 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
3767 unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
3768 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3769 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3770 .addReg(MemI.getOperand(0).getReg(),
3771 MemI.mayLoad() ? RegState::Define : 0)
3772 .addReg(AM.BaseReg)
3773 .addReg(AM.ScaledReg)
3774 .addImm(0)
3775 .addImm(AM.Scale > 1)
3776 .setMemRefs(MemI.memoperands())
3777 .setMIFlags(MemI.getFlags());
3778 return B.getInstr();
3779 }
3780
3781 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
3782 "Addressing mode not supported for folding");
3783
3784 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
3785 unsigned Scale = 1;
3786 unsigned Opcode = MemI.getOpcode();
3787 if (isInt<9>(AM.Displacement))
3788 Opcode = unscaledOffsetOpcode(Opcode);
3789 else
3790 Opcode = scaledOffsetOpcode(Opcode, Scale);
3791
3792 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3793 .addReg(MemI.getOperand(0).getReg(),
3794 MemI.mayLoad() ? RegState::Define : 0)
3795 .addReg(AM.BaseReg)
3796 .addImm(AM.Displacement / Scale)
3797 .setMemRefs(MemI.memoperands())
3798 .setMIFlags(MemI.getFlags());
3799 return B.getInstr();
3800 }
3801
3804 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
3805 assert(AM.ScaledReg && !AM.Displacement &&
3806 "Address offset can be a register or an immediate, but not both");
3807 unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
3808 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3809 // Make sure the offset register is in the correct register class.
3810 Register OffsetReg = AM.ScaledReg;
3811 const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
3812 if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
3813 OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3814 BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
3815 .addReg(AM.ScaledReg, 0, AArch64::sub_32);
3816 }
3817 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3818 .addReg(MemI.getOperand(0).getReg(),
3819 MemI.mayLoad() ? RegState::Define : 0)
3820 .addReg(AM.BaseReg)
3821 .addReg(OffsetReg)
3823 .addImm(AM.Scale != 1)
3824 .setMemRefs(MemI.memoperands())
3825 .setMIFlags(MemI.getFlags());
3826
3827 return B.getInstr();
3828 }
3829
3831 "Function must not be called with an addressing mode it can't handle");
3832}
3833
3834/// Return true if the opcode is a post-index ld/st instruction, which really
3835/// loads from base+0.
3836static bool isPostIndexLdStOpcode(unsigned Opcode) {
3837 switch (Opcode) {
3838 default:
3839 return false;
3840 case AArch64::LD1Fourv16b_POST:
3841 case AArch64::LD1Fourv1d_POST:
3842 case AArch64::LD1Fourv2d_POST:
3843 case AArch64::LD1Fourv2s_POST:
3844 case AArch64::LD1Fourv4h_POST:
3845 case AArch64::LD1Fourv4s_POST:
3846 case AArch64::LD1Fourv8b_POST:
3847 case AArch64::LD1Fourv8h_POST:
3848 case AArch64::LD1Onev16b_POST:
3849 case AArch64::LD1Onev1d_POST:
3850 case AArch64::LD1Onev2d_POST:
3851 case AArch64::LD1Onev2s_POST:
3852 case AArch64::LD1Onev4h_POST:
3853 case AArch64::LD1Onev4s_POST:
3854 case AArch64::LD1Onev8b_POST:
3855 case AArch64::LD1Onev8h_POST:
3856 case AArch64::LD1Rv16b_POST:
3857 case AArch64::LD1Rv1d_POST:
3858 case AArch64::LD1Rv2d_POST:
3859 case AArch64::LD1Rv2s_POST:
3860 case AArch64::LD1Rv4h_POST:
3861 case AArch64::LD1Rv4s_POST:
3862 case AArch64::LD1Rv8b_POST:
3863 case AArch64::LD1Rv8h_POST:
3864 case AArch64::LD1Threev16b_POST:
3865 case AArch64::LD1Threev1d_POST:
3866 case AArch64::LD1Threev2d_POST:
3867 case AArch64::LD1Threev2s_POST:
3868 case AArch64::LD1Threev4h_POST:
3869 case AArch64::LD1Threev4s_POST:
3870 case AArch64::LD1Threev8b_POST:
3871 case AArch64::LD1Threev8h_POST:
3872 case AArch64::LD1Twov16b_POST:
3873 case AArch64::LD1Twov1d_POST:
3874 case AArch64::LD1Twov2d_POST:
3875 case AArch64::LD1Twov2s_POST:
3876 case AArch64::LD1Twov4h_POST:
3877 case AArch64::LD1Twov4s_POST:
3878 case AArch64::LD1Twov8b_POST:
3879 case AArch64::LD1Twov8h_POST:
3880 case AArch64::LD1i16_POST:
3881 case AArch64::LD1i32_POST:
3882 case AArch64::LD1i64_POST:
3883 case AArch64::LD1i8_POST:
3884 case AArch64::LD2Rv16b_POST:
3885 case AArch64::LD2Rv1d_POST:
3886 case AArch64::LD2Rv2d_POST:
3887 case AArch64::LD2Rv2s_POST:
3888 case AArch64::LD2Rv4h_POST:
3889 case AArch64::LD2Rv4s_POST:
3890 case AArch64::LD2Rv8b_POST:
3891 case AArch64::LD2Rv8h_POST:
3892 case AArch64::LD2Twov16b_POST:
3893 case AArch64::LD2Twov2d_POST:
3894 case AArch64::LD2Twov2s_POST:
3895 case AArch64::LD2Twov4h_POST:
3896 case AArch64::LD2Twov4s_POST:
3897 case AArch64::LD2Twov8b_POST:
3898 case AArch64::LD2Twov8h_POST:
3899 case AArch64::LD2i16_POST:
3900 case AArch64::LD2i32_POST:
3901 case AArch64::LD2i64_POST:
3902 case AArch64::LD2i8_POST:
3903 case AArch64::LD3Rv16b_POST:
3904 case AArch64::LD3Rv1d_POST:
3905 case AArch64::LD3Rv2d_POST:
3906 case AArch64::LD3Rv2s_POST:
3907 case AArch64::LD3Rv4h_POST:
3908 case AArch64::LD3Rv4s_POST:
3909 case AArch64::LD3Rv8b_POST:
3910 case AArch64::LD3Rv8h_POST:
3911 case AArch64::LD3Threev16b_POST:
3912 case AArch64::LD3Threev2d_POST:
3913 case AArch64::LD3Threev2s_POST:
3914 case AArch64::LD3Threev4h_POST:
3915 case AArch64::LD3Threev4s_POST:
3916 case AArch64::LD3Threev8b_POST:
3917 case AArch64::LD3Threev8h_POST:
3918 case AArch64::LD3i16_POST:
3919 case AArch64::LD3i32_POST:
3920 case AArch64::LD3i64_POST:
3921 case AArch64::LD3i8_POST:
3922 case AArch64::LD4Fourv16b_POST:
3923 case AArch64::LD4Fourv2d_POST:
3924 case AArch64::LD4Fourv2s_POST:
3925 case AArch64::LD4Fourv4h_POST:
3926 case AArch64::LD4Fourv4s_POST:
3927 case AArch64::LD4Fourv8b_POST:
3928 case AArch64::LD4Fourv8h_POST:
3929 case AArch64::LD4Rv16b_POST:
3930 case AArch64::LD4Rv1d_POST:
3931 case AArch64::LD4Rv2d_POST:
3932 case AArch64::LD4Rv2s_POST:
3933 case AArch64::LD4Rv4h_POST:
3934 case AArch64::LD4Rv4s_POST:
3935 case AArch64::LD4Rv8b_POST:
3936 case AArch64::LD4Rv8h_POST:
3937 case AArch64::LD4i16_POST:
3938 case AArch64::LD4i32_POST:
3939 case AArch64::LD4i64_POST:
3940 case AArch64::LD4i8_POST:
3941 case AArch64::LDAPRWpost:
3942 case AArch64::LDAPRXpost:
3943 case AArch64::LDIAPPWpost:
3944 case AArch64::LDIAPPXpost:
3945 case AArch64::LDPDpost:
3946 case AArch64::LDPQpost:
3947 case AArch64::LDPSWpost:
3948 case AArch64::LDPSpost:
3949 case AArch64::LDPWpost:
3950 case AArch64::LDPXpost:
3951 case AArch64::LDRBBpost:
3952 case AArch64::LDRBpost:
3953 case AArch64::LDRDpost:
3954 case AArch64::LDRHHpost:
3955 case AArch64::LDRHpost:
3956 case AArch64::LDRQpost:
3957 case AArch64::LDRSBWpost:
3958 case AArch64::LDRSBXpost:
3959 case AArch64::LDRSHWpost:
3960 case AArch64::LDRSHXpost:
3961 case AArch64::LDRSWpost:
3962 case AArch64::LDRSpost:
3963 case AArch64::LDRWpost:
3964 case AArch64::LDRXpost:
3965 case AArch64::ST1Fourv16b_POST:
3966 case AArch64::ST1Fourv1d_POST:
3967 case AArch64::ST1Fourv2d_POST:
3968 case AArch64::ST1Fourv2s_POST:
3969 case AArch64::ST1Fourv4h_POST:
3970 case AArch64::ST1Fourv4s_POST:
3971 case AArch64::ST1Fourv8b_POST:
3972 case AArch64::ST1Fourv8h_POST:
3973 case AArch64::ST1Onev16b_POST:
3974 case AArch64::ST1Onev1d_POST:
3975 case AArch64::ST1Onev2d_POST:
3976 case AArch64::ST1Onev2s_POST:
3977 case AArch64::ST1Onev4h_POST:
3978 case AArch64::ST1Onev4s_POST:
3979 case AArch64::ST1Onev8b_POST:
3980 case AArch64::ST1Onev8h_POST:
3981 case AArch64::ST1Threev16b_POST:
3982 case AArch64::ST1Threev1d_POST:
3983 case AArch64::ST1Threev2d_POST:
3984 case AArch64::ST1Threev2s_POST:
3985 case AArch64::ST1Threev4h_POST:
3986 case AArch64::ST1Threev4s_POST:
3987 case AArch64::ST1Threev8b_POST:
3988 case AArch64::ST1Threev8h_POST:
3989 case AArch64::ST1Twov16b_POST:
3990 case AArch64::ST1Twov1d_POST:
3991 case AArch64::ST1Twov2d_POST:
3992 case AArch64::ST1Twov2s_POST:
3993 case AArch64::ST1Twov4h_POST:
3994 case AArch64::ST1Twov4s_POST:
3995 case AArch64::ST1Twov8b_POST:
3996 case AArch64::ST1Twov8h_POST:
3997 case AArch64::ST1i16_POST:
3998 case AArch64::ST1i32_POST:
3999 case AArch64::ST1i64_POST:
4000 case AArch64::ST1i8_POST:
4001 case AArch64::ST2GPostIndex:
4002 case AArch64::ST2Twov16b_POST:
4003 case AArch64::ST2Twov2d_POST:
4004 case AArch64::ST2Twov2s_POST:
4005 case AArch64::ST2Twov4h_POST:
4006 case AArch64::ST2Twov4s_POST:
4007 case AArch64::ST2Twov8b_POST:
4008 case AArch64::ST2Twov8h_POST:
4009 case AArch64::ST2i16_POST:
4010 case AArch64::ST2i32_POST:
4011 case AArch64::ST2i64_POST:
4012 case AArch64::ST2i8_POST:
4013 case AArch64::ST3Threev16b_POST:
4014 case AArch64::ST3Threev2d_POST:
4015 case AArch64::ST3Threev2s_POST:
4016 case AArch64::ST3Threev4h_POST:
4017 case AArch64::ST3Threev4s_POST:
4018 case AArch64::ST3Threev8b_POST:
4019 case AArch64::ST3Threev8h_POST:
4020 case AArch64::ST3i16_POST:
4021 case AArch64::ST3i32_POST:
4022 case AArch64::ST3i64_POST:
4023 case AArch64::ST3i8_POST:
4024 case AArch64::ST4Fourv16b_POST:
4025 case AArch64::ST4Fourv2d_POST:
4026 case AArch64::ST4Fourv2s_POST:
4027 case AArch64::ST4Fourv4h_POST:
4028 case AArch64::ST4Fourv4s_POST:
4029 case AArch64::ST4Fourv8b_POST:
4030 case AArch64::ST4Fourv8h_POST:
4031 case AArch64::ST4i16_POST:
4032 case AArch64::ST4i32_POST:
4033 case AArch64::ST4i64_POST:
4034 case AArch64::ST4i8_POST:
4035 case AArch64::STGPostIndex:
4036 case AArch64::STGPpost:
4037 case AArch64::STPDpost:
4038 case AArch64::STPQpost:
4039 case AArch64::STPSpost:
4040 case AArch64::STPWpost:
4041 case AArch64::STPXpost:
4042 case AArch64::STRBBpost:
4043 case AArch64::STRBpost:
4044 case AArch64::STRDpost:
4045 case AArch64::STRHHpost:
4046 case AArch64::STRHpost:
4047 case AArch64::STRQpost:
4048 case AArch64::STRSpost:
4049 case AArch64::STRWpost:
4050 case AArch64::STRXpost:
4051 case AArch64::STZ2GPostIndex:
4052 case AArch64::STZGPostIndex:
4053 return true;
4054 }
4055}
4056
4058 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
4059 bool &OffsetIsScalable, TypeSize &Width,
4060 const TargetRegisterInfo *TRI) const {
4061 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4062 // Handle only loads/stores with base register followed by immediate offset.
4063 if (LdSt.getNumExplicitOperands() == 3) {
4064 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
4065 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
4066 !LdSt.getOperand(2).isImm())
4067 return false;
4068 } else if (LdSt.getNumExplicitOperands() == 4) {
4069 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
4070 if (!LdSt.getOperand(1).isReg() ||
4071 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
4072 !LdSt.getOperand(3).isImm())
4073 return false;
4074 } else
4075 return false;
4076
4077 // Get the scaling factor for the instruction and set the width for the
4078 // instruction.
4079 TypeSize Scale(0U, false);
4080 int64_t Dummy1, Dummy2;
4081
4082 // If this returns false, then it's an instruction we don't want to handle.
4083 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
4084 return false;
4085
4086 // Compute the offset. Offset is calculated as the immediate operand
4087 // multiplied by the scaling factor. Unscaled instructions have scaling factor
4088 // set to 1. Postindex are a special case which have an offset of 0.
4089 if (isPostIndexLdStOpcode(LdSt.getOpcode())) {
4090 BaseOp = &LdSt.getOperand(2);
4091 Offset = 0;
4092 } else if (LdSt.getNumExplicitOperands() == 3) {
4093 BaseOp = &LdSt.getOperand(1);
4094 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
4095 } else {
4096 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
4097 BaseOp = &LdSt.getOperand(2);
4098 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
4099 }
4100 OffsetIsScalable = Scale.isScalable();
4101
4102 return BaseOp->isReg() || BaseOp->isFI();
4103}
4104
4107 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4108 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
4109 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
4110 return OfsOp;
4111}
4112
4113bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
4114 TypeSize &Width, int64_t &MinOffset,
4115 int64_t &MaxOffset) {
4116 switch (Opcode) {
4117 // Not a memory operation or something we want to handle.
4118 default:
4119 Scale = TypeSize::getFixed(0);
4120 Width = TypeSize::getFixed(0);
4121 MinOffset = MaxOffset = 0;
4122 return false;
4123 // LDR / STR
4124 case AArch64::LDRQui:
4125 case AArch64::STRQui:
4126 Scale = TypeSize::getFixed(16);
4127 Width = TypeSize::getFixed(16);
4128 MinOffset = 0;
4129 MaxOffset = 4095;
4130 break;
4131 case AArch64::LDRXui:
4132 case AArch64::LDRDui:
4133 case AArch64::STRXui:
4134 case AArch64::STRDui:
4135 case AArch64::PRFMui:
4136 Scale = TypeSize::getFixed(8);
4137 Width = TypeSize::getFixed(8);
4138 MinOffset = 0;
4139 MaxOffset = 4095;
4140 break;
4141 case AArch64::LDRWui:
4142 case AArch64::LDRSui:
4143 case AArch64::LDRSWui:
4144 case AArch64::STRWui:
4145 case AArch64::STRSui:
4146 Scale = TypeSize::getFixed(4);
4147 Width = TypeSize::getFixed(4);
4148 MinOffset = 0;
4149 MaxOffset = 4095;
4150 break;
4151 case AArch64::LDRHui:
4152 case AArch64::LDRHHui:
4153 case AArch64::LDRSHWui:
4154 case AArch64::LDRSHXui:
4155 case AArch64::STRHui:
4156 case AArch64::STRHHui:
4157 Scale = TypeSize::getFixed(2);
4158 Width = TypeSize::getFixed(2);
4159 MinOffset = 0;
4160 MaxOffset = 4095;
4161 break;
4162 case AArch64::LDRBui:
4163 case AArch64::LDRBBui:
4164 case AArch64::LDRSBWui:
4165 case AArch64::LDRSBXui:
4166 case AArch64::STRBui:
4167 case AArch64::STRBBui:
4168 Scale = TypeSize::getFixed(1);
4169 Width = TypeSize::getFixed(1);
4170 MinOffset = 0;
4171 MaxOffset = 4095;
4172 break;
4173 // post/pre inc
4174 case AArch64::STRQpre:
4175 case AArch64::LDRQpost:
4176 Scale = TypeSize::getFixed(1);
4177 Width = TypeSize::getFixed(16);
4178 MinOffset = -256;
4179 MaxOffset = 255;
4180 break;
4181 case AArch64::LDRDpost:
4182 case AArch64::LDRDpre:
4183 case AArch64::LDRXpost:
4184 case AArch64::LDRXpre:
4185 case AArch64::STRDpost:
4186 case AArch64::STRDpre:
4187 case AArch64::STRXpost:
4188 case AArch64::STRXpre:
4189 Scale = TypeSize::getFixed(1);
4190 Width = TypeSize::getFixed(8);
4191 MinOffset = -256;
4192 MaxOffset = 255;
4193 break;
4194 case AArch64::STRWpost:
4195 case AArch64::STRWpre:
4196 case AArch64::LDRWpost:
4197 case AArch64::LDRWpre:
4198 case AArch64::STRSpost:
4199 case AArch64::STRSpre:
4200 case AArch64::LDRSpost:
4201 case AArch64::LDRSpre:
4202 Scale = TypeSize::getFixed(1);
4203 Width = TypeSize::getFixed(4);
4204 MinOffset = -256;
4205 MaxOffset = 255;
4206 break;
4207 case AArch64::LDRHpost:
4208 case AArch64::LDRHpre:
4209 case AArch64::STRHpost:
4210 case AArch64::STRHpre:
4211 case AArch64::LDRHHpost:
4212 case AArch64::LDRHHpre:
4213 case AArch64::STRHHpost:
4214 case AArch64::STRHHpre:
4215 Scale = TypeSize::getFixed(1);
4216 Width = TypeSize::getFixed(2);
4217 MinOffset = -256;
4218 MaxOffset = 255;
4219 break;
4220 case AArch64::LDRBpost:
4221 case AArch64::LDRBpre:
4222 case AArch64::STRBpost:
4223 case AArch64::STRBpre:
4224 case AArch64::LDRBBpost:
4225 case AArch64::LDRBBpre:
4226 case AArch64::STRBBpost:
4227 case AArch64::STRBBpre:
4228 Scale = TypeSize::getFixed(1);
4229 Width = TypeSize::getFixed(1);
4230 MinOffset = -256;
4231 MaxOffset = 255;
4232 break;
4233 // Unscaled
4234 case AArch64::LDURQi:
4235 case AArch64::STURQi:
4236 Scale = TypeSize::getFixed(1);
4237 Width = TypeSize::getFixed(16);
4238 MinOffset = -256;
4239 MaxOffset = 255;
4240 break;
4241 case AArch64::LDURXi:
4242 case AArch64::LDURDi:
4243 case AArch64::LDAPURXi:
4244 case AArch64::STURXi:
4245 case AArch64::STURDi:
4246 case AArch64::STLURXi:
4247 case AArch64::PRFUMi:
4248 Scale = TypeSize::getFixed(1);
4249 Width = TypeSize::getFixed(8);
4250 MinOffset = -256;
4251 MaxOffset = 255;
4252 break;
4253 case AArch64::LDURWi:
4254 case AArch64::LDURSi:
4255 case AArch64::LDURSWi:
4256 case AArch64::LDAPURi:
4257 case AArch64::LDAPURSWi:
4258 case AArch64::STURWi:
4259 case AArch64::STURSi:
4260 case AArch64::STLURWi:
4261 Scale = TypeSize::getFixed(1);
4262 Width = TypeSize::getFixed(4);
4263 MinOffset = -256;
4264 MaxOffset = 255;
4265 break;
4266 case AArch64::LDURHi:
4267 case AArch64::LDURHHi:
4268 case AArch64::LDURSHXi:
4269 case AArch64::LDURSHWi:
4270 case AArch64::LDAPURHi:
4271 case AArch64::LDAPURSHWi:
4272 case AArch64::LDAPURSHXi:
4273 case AArch64::STURHi:
4274 case AArch64::STURHHi:
4275 case AArch64::STLURHi:
4276 Scale = TypeSize::getFixed(1);
4277 Width = TypeSize::getFixed(2);
4278 MinOffset = -256;
4279 MaxOffset = 255;
4280 break;
4281 case AArch64::LDURBi:
4282 case AArch64::LDURBBi:
4283 case AArch64::LDURSBXi:
4284 case AArch64::LDURSBWi:
4285 case AArch64::LDAPURBi:
4286 case AArch64::LDAPURSBWi:
4287 case AArch64::LDAPURSBXi:
4288 case AArch64::STURBi:
4289 case AArch64::STURBBi:
4290 case AArch64::STLURBi:
4291 Scale = TypeSize::getFixed(1);
4292 Width = TypeSize::getFixed(1);
4293 MinOffset = -256;
4294 MaxOffset = 255;
4295 break;
4296 // LDP / STP (including pre/post inc)
4297 case AArch64::LDPQi:
4298 case AArch64::LDNPQi:
4299 case AArch64::STPQi:
4300 case AArch64::STNPQi:
4301 case AArch64::LDPQpost:
4302 case AArch64::LDPQpre:
4303 case AArch64::STPQpost:
4304 case AArch64::STPQpre:
4305 Scale = TypeSize::getFixed(16);
4306 Width = TypeSize::getFixed(16 * 2);
4307 MinOffset = -64;
4308 MaxOffset = 63;
4309 break;
4310 case AArch64::LDPXi:
4311 case AArch64::LDPDi:
4312 case AArch64::LDNPXi:
4313 case AArch64::LDNPDi:
4314 case AArch64::STPXi:
4315 case AArch64::STPDi:
4316 case AArch64::STNPXi:
4317 case AArch64::STNPDi:
4318 case AArch64::LDPDpost:
4319 case AArch64::LDPDpre:
4320 case AArch64::LDPXpost:
4321 case AArch64::LDPXpre:
4322 case AArch64::STPDpost:
4323 case AArch64::STPDpre:
4324 case AArch64::STPXpost:
4325 case AArch64::STPXpre:
4326 Scale = TypeSize::getFixed(8);
4327 Width = TypeSize::getFixed(8 * 2);
4328 MinOffset = -64;
4329 MaxOffset = 63;
4330 break;
4331 case AArch64::LDPWi:
4332 case AArch64::LDPSi:
4333 case AArch64::LDNPWi:
4334 case AArch64::LDNPSi:
4335 case AArch64::STPWi:
4336 case AArch64::STPSi:
4337 case AArch64::STNPWi:
4338 case AArch64::STNPSi:
4339 case AArch64::LDPSpost:
4340 case AArch64::LDPSpre:
4341 case AArch64::LDPWpost:
4342 case AArch64::LDPWpre:
4343 case AArch64::STPSpost:
4344 case AArch64::STPSpre:
4345 case AArch64::STPWpost:
4346 case AArch64::STPWpre:
4347 Scale = TypeSize::getFixed(4);
4348 Width = TypeSize::getFixed(4 * 2);
4349 MinOffset = -64;
4350 MaxOffset = 63;
4351 break;
4352 case AArch64::StoreSwiftAsyncContext:
4353 // Store is an STRXui, but there might be an ADDXri in the expansion too.
4354 Scale = TypeSize::getFixed(1);
4355 Width = TypeSize::getFixed(8);
4356 MinOffset = 0;
4357 MaxOffset = 4095;
4358 break;
4359 case AArch64::ADDG:
4360 Scale = TypeSize::getFixed(16);
4361 Width = TypeSize::getFixed(0);
4362 MinOffset = 0;
4363 MaxOffset = 63;
4364 break;
4365 case AArch64::TAGPstack:
4366 Scale = TypeSize::getFixed(16);
4367 Width = TypeSize::getFixed(0);
4368 // TAGP with a negative offset turns into SUBP, which has a maximum offset
4369 // of 63 (not 64!).
4370 MinOffset = -63;
4371 MaxOffset = 63;
4372 break;
4373 case AArch64::LDG:
4374 case AArch64::STGi:
4375 case AArch64::STGPreIndex:
4376 case AArch64::STGPostIndex:
4377 case AArch64::STZGi:
4378 case AArch64::STZGPreIndex:
4379 case AArch64::STZGPostIndex:
4380 Scale = TypeSize::getFixed(16);
4381 Width = TypeSize::getFixed(16);
4382 MinOffset = -256;
4383 MaxOffset = 255;
4384 break;
4385 // SVE
4386 case AArch64::STR_ZZZZXI:
4387 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
4388 case AArch64::LDR_ZZZZXI:
4389 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
4390 Scale = TypeSize::getScalable(16);
4391 Width = TypeSize::getScalable(16 * 4);
4392 MinOffset = -256;
4393 MaxOffset = 252;
4394 break;
4395 case AArch64::STR_ZZZXI:
4396 case AArch64::LDR_ZZZXI:
4397 Scale = TypeSize::getScalable(16);
4398 Width = TypeSize::getScalable(16 * 3);
4399 MinOffset = -256;
4400 MaxOffset = 253;
4401 break;
4402 case AArch64::STR_ZZXI:
4403 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
4404 case AArch64::LDR_ZZXI:
4405 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
4406 Scale = TypeSize::getScalable(16);
4407 Width = TypeSize::getScalable(16 * 2);
4408 MinOffset = -256;
4409 MaxOffset = 254;
4410 break;
4411 case AArch64::LDR_PXI:
4412 case AArch64::STR_PXI:
4413 Scale = TypeSize::getScalable(2);
4414 Width = TypeSize::getScalable(2);
4415 MinOffset = -256;
4416 MaxOffset = 255;
4417 break;
4418 case AArch64::LDR_PPXI:
4419 case AArch64::STR_PPXI:
4420 Scale = TypeSize::getScalable(2);
4421 Width = TypeSize::getScalable(2 * 2);
4422 MinOffset = -256;
4423 MaxOffset = 254;
4424 break;
4425 case AArch64::LDR_ZXI:
4426 case AArch64::STR_ZXI:
4427 Scale = TypeSize::getScalable(16);
4428 Width = TypeSize::getScalable(16);
4429 MinOffset = -256;
4430 MaxOffset = 255;
4431 break;
4432 case AArch64::LD1B_IMM:
4433 case AArch64::LD1H_IMM:
4434 case AArch64::LD1W_IMM:
4435 case AArch64::LD1D_IMM:
4436 case AArch64::LDNT1B_ZRI:
4437 case AArch64::LDNT1H_ZRI:
4438 case AArch64::LDNT1W_ZRI:
4439 case AArch64::LDNT1D_ZRI:
4440 case AArch64::ST1B_IMM:
4441 case AArch64::ST1H_IMM:
4442 case AArch64::ST1W_IMM:
4443 case AArch64::ST1D_IMM:
4444 case AArch64::STNT1B_ZRI:
4445 case AArch64::STNT1H_ZRI:
4446 case AArch64::STNT1W_ZRI:
4447 case AArch64::STNT1D_ZRI:
4448 case AArch64::LDNF1B_IMM:
4449 case AArch64::LDNF1H_IMM:
4450 case AArch64::LDNF1W_IMM:
4451 case AArch64::LDNF1D_IMM:
4452 // A full vectors worth of data
4453 // Width = mbytes * elements
4454 Scale = TypeSize::getScalable(16);
4455 Width = TypeSize::getScalable(16);
4456 MinOffset = -8;
4457 MaxOffset = 7;
4458 break;
4459 case AArch64::LD2B_IMM:
4460 case AArch64::LD2H_IMM:
4461 case AArch64::LD2W_IMM:
4462 case AArch64::LD2D_IMM:
4463 case AArch64::ST2B_IMM:
4464 case AArch64::ST2H_IMM:
4465 case AArch64::ST2W_IMM:
4466 case AArch64::ST2D_IMM:
4467 Scale = TypeSize::getScalable(32);
4468 Width = TypeSize::getScalable(16 * 2);
4469 MinOffset = -8;
4470 MaxOffset = 7;
4471 break;
4472 case AArch64::LD3B_IMM:
4473 case AArch64::LD3H_IMM:
4474 case AArch64::LD3W_IMM:
4475 case AArch64::LD3D_IMM:
4476 case AArch64::ST3B_IMM:
4477 case AArch64::ST3H_IMM:
4478 case AArch64::ST3W_IMM:
4479 case AArch64::ST3D_IMM:
4480 Scale = TypeSize::getScalable(48);
4481 Width = TypeSize::getScalable(16 * 3);
4482 MinOffset = -8;
4483 MaxOffset = 7;
4484 break;
4485 case AArch64::LD4B_IMM:
4486 case AArch64::LD4H_IMM:
4487 case AArch64::LD4W_IMM:
4488 case AArch64::LD4D_IMM:
4489 case AArch64::ST4B_IMM:
4490 case AArch64::ST4H_IMM:
4491 case AArch64::ST4W_IMM:
4492 case AArch64::ST4D_IMM:
4493 Scale = TypeSize::getScalable(64);
4494 Width = TypeSize::getScalable(16 * 4);
4495 MinOffset = -8;
4496 MaxOffset = 7;
4497 break;
4498 case AArch64::LD1B_H_IMM:
4499 case AArch64::LD1SB_H_IMM:
4500 case AArch64::LD1H_S_IMM:
4501 case AArch64::LD1SH_S_IMM:
4502 case AArch64::LD1W_D_IMM:
4503 case AArch64::LD1SW_D_IMM:
4504 case AArch64::ST1B_H_IMM:
4505 case AArch64::ST1H_S_IMM:
4506 case AArch64::ST1W_D_IMM:
4507 case AArch64::LDNF1B_H_IMM:
4508 case AArch64::LDNF1SB_H_IMM:
4509 case AArch64::LDNF1H_S_IMM:
4510 case AArch64::LDNF1SH_S_IMM:
4511 case AArch64::LDNF1W_D_IMM:
4512 case AArch64::LDNF1SW_D_IMM:
4513 // A half vector worth of data
4514 // Width = mbytes * elements
4515 Scale = TypeSize::getScalable(8);
4516 Width = TypeSize::getScalable(8);
4517 MinOffset = -8;
4518 MaxOffset = 7;
4519 break;
4520 case AArch64::LD1B_S_IMM:
4521 case AArch64::LD1SB_S_IMM:
4522 case AArch64::LD1H_D_IMM:
4523 case AArch64::LD1SH_D_IMM:
4524 case AArch64::ST1B_S_IMM:
4525 case AArch64::ST1H_D_IMM:
4526 case AArch64::LDNF1B_S_IMM:
4527 case AArch64::LDNF1SB_S_IMM:
4528 case AArch64::LDNF1H_D_IMM:
4529 case AArch64::LDNF1SH_D_IMM:
4530 // A quarter vector worth of data
4531 // Width = mbytes * elements
4532 Scale = TypeSize::getScalable(4);
4533 Width = TypeSize::getScalable(4);
4534 MinOffset = -8;
4535 MaxOffset = 7;
4536 break;
4537 case AArch64::LD1B_D_IMM:
4538 case AArch64::LD1SB_D_IMM:
4539 case AArch64::ST1B_D_IMM:
4540 case AArch64::LDNF1B_D_IMM:
4541 case AArch64::LDNF1SB_D_IMM:
4542 // A eighth vector worth of data
4543 // Width = mbytes * elements
4544 Scale = TypeSize::getScalable(2);
4545 Width = TypeSize::getScalable(2);
4546 MinOffset = -8;
4547 MaxOffset = 7;
4548 break;
4549 case AArch64::ST2Gi:
4550 case AArch64::ST2GPreIndex:
4551 case AArch64::ST2GPostIndex:
4552 case AArch64::STZ2Gi:
4553 case AArch64::STZ2GPreIndex:
4554 case AArch64::STZ2GPostIndex:
4555 Scale = TypeSize::getFixed(16);
4556 Width = TypeSize::getFixed(32);
4557 MinOffset = -256;
4558 MaxOffset = 255;
4559 break;
4560 case AArch64::STGPi:
4561 case AArch64::STGPpost:
4562 case AArch64::STGPpre:
4563 Scale = TypeSize::getFixed(16);
4564 Width = TypeSize::getFixed(16);
4565 MinOffset = -64;
4566 MaxOffset = 63;
4567 break;
4568 case AArch64::LD1RB_IMM:
4569 case AArch64::LD1RB_H_IMM:
4570 case AArch64::LD1RB_S_IMM:
4571 case AArch64::LD1RB_D_IMM:
4572 case AArch64::LD1RSB_H_IMM:
4573 case AArch64::LD1RSB_S_IMM:
4574 case AArch64::LD1RSB_D_IMM:
4575 Scale = TypeSize::getFixed(1);
4576 Width = TypeSize::getFixed(1);
4577 MinOffset = 0;
4578 MaxOffset = 63;
4579 break;
4580 case AArch64::LD1RH_IMM:
4581 case AArch64::LD1RH_S_IMM:
4582 case AArch64::LD1RH_D_IMM:
4583 case AArch64::LD1RSH_S_IMM:
4584 case AArch64::LD1RSH_D_IMM:
4585 Scale = TypeSize::getFixed(2);
4586 Width = TypeSize::getFixed(2);
4587 MinOffset = 0;
4588 MaxOffset = 63;
4589 break;
4590 case AArch64::LD1RW_IMM:
4591 case AArch64::LD1RW_D_IMM:
4592 case AArch64::LD1RSW_IMM:
4593 Scale = TypeSize::getFixed(4);
4594 Width = TypeSize::getFixed(4);
4595 MinOffset = 0;
4596 MaxOffset = 63;
4597 break;
4598 case AArch64::LD1RD_IMM:
4599 Scale = TypeSize::getFixed(8);
4600 Width = TypeSize::getFixed(8);
4601 MinOffset = 0;
4602 MaxOffset = 63;
4603 break;
4604 }
4605
4606 return true;
4607}
4608
4609// Scaling factor for unscaled load or store.
4611 switch (Opc) {
4612 default:
4613 llvm_unreachable("Opcode has unknown scale!");
4614 case AArch64::LDRBBui:
4615 case AArch64::LDURBBi:
4616 case AArch64::LDRSBWui:
4617 case AArch64::LDURSBWi:
4618 case AArch64::STRBBui:
4619 case AArch64::STURBBi:
4620 return 1;
4621 case AArch64::LDRHHui:
4622 case AArch64::LDURHHi:
4623 case AArch64::LDRSHWui:
4624 case AArch64::LDURSHWi:
4625 case AArch64::STRHHui:
4626 case AArch64::STURHHi:
4627 return 2;
4628 case AArch64::LDRSui:
4629 case AArch64::LDURSi:
4630 case AArch64::LDRSpre:
4631 case AArch64::LDRSWui:
4632 case AArch64::LDURSWi:
4633 case AArch64::LDRSWpre:
4634 case AArch64::LDRWpre:
4635 case AArch64::LDRWui:
4636 case AArch64::LDURWi:
4637 case AArch64::STRSui:
4638 case AArch64::STURSi:
4639 case AArch64::STRSpre:
4640 case AArch64::STRWui:
4641 case AArch64::STURWi:
4642 case AArch64::STRWpre:
4643 case AArch64::LDPSi:
4644 case AArch64::LDPSWi:
4645 case AArch64::LDPWi:
4646 case AArch64::STPSi:
4647 case AArch64::STPWi:
4648 return 4;
4649 case AArch64::LDRDui:
4650 case AArch64::LDURDi:
4651 case AArch64::LDRDpre:
4652 case AArch64::LDRXui:
4653 case AArch64::LDURXi:
4654 case AArch64::LDRXpre:
4655 case AArch64::STRDui:
4656 case AArch64::STURDi:
4657 case AArch64::STRDpre:
4658 case AArch64::STRXui:
4659 case AArch64::STURXi:
4660 case AArch64::STRXpre:
4661 case AArch64::LDPDi:
4662 case AArch64::LDPXi:
4663 case AArch64::STPDi:
4664 case AArch64::STPXi:
4665 return 8;
4666 case AArch64::LDRQui:
4667 case AArch64::LDURQi:
4668 case AArch64::STRQui:
4669 case AArch64::STURQi:
4670 case AArch64::STRQpre:
4671 case AArch64::LDPQi:
4672 case AArch64::LDRQpre:
4673 case AArch64::STPQi:
4674 case AArch64::STGi:
4675 case AArch64::STZGi:
4676 case AArch64::ST2Gi:
4677 case AArch64::STZ2Gi:
4678 case AArch64::STGPi:
4679 return 16;
4680 }
4681}
4682
4684 switch (MI.getOpcode()) {
4685 default:
4686 return false;
4687 case AArch64::LDRWpre:
4688 case AArch64::LDRXpre:
4689 case AArch64::LDRSWpre:
4690 case AArch64::LDRSpre:
4691 case AArch64::LDRDpre:
4692 case AArch64::LDRQpre:
4693 return true;
4694 }
4695}
4696
4698 switch (MI.getOpcode()) {
4699 default:
4700 return false;
4701 case AArch64::STRWpre:
4702 case AArch64::STRXpre:
4703 case AArch64::STRSpre:
4704 case AArch64::STRDpre:
4705 case AArch64::STRQpre:
4706 return true;
4707 }
4708}
4709
4711 return isPreLd(MI) || isPreSt(MI);
4712}
4713
4715 switch (MI.getOpcode()) {
4716 default:
4717 return false;
4718 case AArch64::LDPSi:
4719 case AArch64::LDPSWi:
4720 case AArch64::LDPDi:
4721 case AArch64::LDPQi:
4722 case AArch64::LDPWi:
4723 case AArch64::LDPXi:
4724 case AArch64::STPSi:
4725 case AArch64::STPDi:
4726 case AArch64::STPQi:
4727 case AArch64::STPWi:
4728 case AArch64::STPXi:
4729 case AArch64::STGPi:
4730 return true;
4731 }
4732}
4733
4735 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4736 unsigned Idx =
4738 : 1;
4739 return MI.getOperand(Idx);
4740}
4741
4742const MachineOperand &
4744 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4745 unsigned Idx =
4747 : 2;
4748 return MI.getOperand(Idx);
4749}
4750
4751const MachineOperand &
4753 switch (MI.getOpcode()) {
4754 default:
4755 llvm_unreachable("Unexpected opcode");
4756 case AArch64::LDRBroX:
4757 case AArch64::LDRBBroX:
4758 case AArch64::LDRSBXroX:
4759 case AArch64::LDRSBWroX:
4760 case AArch64::LDRHroX:
4761 case AArch64::LDRHHroX:
4762 case AArch64::LDRSHXroX:
4763 case AArch64::LDRSHWroX:
4764 case AArch64::LDRWroX:
4765 case AArch64::LDRSroX:
4766 case AArch64::LDRSWroX:
4767 case AArch64::LDRDroX:
4768 case AArch64::LDRXroX:
4769 case AArch64::LDRQroX:
4770 return MI.getOperand(4);
4771 }
4772}
4773
4775 Register Reg) {
4776 if (MI.getParent() == nullptr)
4777 return nullptr;
4778 const MachineFunction *MF = MI.getParent()->getParent();
4779 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
4780}
4781
4783 auto IsHFPR = [&](const MachineOperand &Op) {
4784 if (!Op.isReg())
4785 return false;
4786 auto Reg = Op.getReg();
4787 if (Reg.isPhysical())
4788 return AArch64::FPR16RegClass.contains(Reg);
4789 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4790 return TRC == &AArch64::FPR16RegClass ||
4791 TRC == &AArch64::FPR16_loRegClass;
4792 };
4793 return llvm::any_of(MI.operands(), IsHFPR);
4794}
4795
4797 auto IsQFPR = [&](const MachineOperand &Op) {
4798 if (!Op.isReg())
4799 return false;
4800 auto Reg = Op.getReg();
4801 if (Reg.isPhysical())
4802 return AArch64::FPR128RegClass.contains(Reg);
4803 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4804 return TRC == &AArch64::FPR128RegClass ||
4805 TRC == &AArch64::FPR128_loRegClass;
4806 };
4807 return llvm::any_of(MI.operands(), IsQFPR);
4808}
4809
4811 switch (MI.getOpcode()) {
4812 case AArch64::BRK:
4813 case AArch64::HLT:
4814 case AArch64::PACIASP:
4815 case AArch64::PACIBSP:
4816 // Implicit BTI behavior.
4817 return true;
4818 case AArch64::PAUTH_PROLOGUE:
4819 // PAUTH_PROLOGUE expands to PACI(A|B)SP.
4820 return true;
4821 case AArch64::HINT: {
4822 unsigned Imm = MI.getOperand(0).getImm();
4823 // Explicit BTI instruction.
4824 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
4825 return true;
4826 // PACI(A|B)SP instructions.
4827 if (Imm == 25 || Imm == 27)
4828 return true;
4829 return false;
4830 }
4831 default:
4832 return false;
4833 }
4834}
4835
4837 if (Reg == 0)
4838 return false;
4839 assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");
4840 return AArch64::FPR128RegClass.contains(Reg) ||
4841 AArch64::FPR64RegClass.contains(Reg) ||
4842 AArch64::FPR32RegClass.contains(Reg) ||
4843 AArch64::FPR16RegClass.contains(Reg) ||
4844 AArch64::FPR8RegClass.contains(Reg);
4845}
4846
4848 auto IsFPR = [&](const MachineOperand &Op) {
4849 if (!Op.isReg())
4850 return false;
4851 auto Reg = Op.getReg();
4852 if (Reg.isPhysical())
4853 return isFpOrNEON(Reg);
4854
4855 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4856 return TRC == &AArch64::FPR128RegClass ||
4857 TRC == &AArch64::FPR128_loRegClass ||
4858 TRC == &AArch64::FPR64RegClass ||
4859 TRC == &AArch64::FPR64_loRegClass ||
4860 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
4861 TRC == &AArch64::FPR8RegClass;
4862 };
4863 return llvm::any_of(MI.operands(), IsFPR);
4864}
4865
4866// Scale the unscaled offsets. Returns false if the unscaled offset can't be
4867// scaled.
4868static bool scaleOffset(unsigned Opc, int64_t &Offset) {
4870
4871 // If the byte-offset isn't a multiple of the stride, we can't scale this
4872 // offset.
4873 if (Offset % Scale != 0)
4874 return false;
4875
4876 // Convert the byte-offset used by unscaled into an "element" offset used
4877 // by the scaled pair load/store instructions.
4878 Offset /= Scale;
4879 return true;
4880}
4881
4882static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
4883 if (FirstOpc == SecondOpc)
4884 return true;
4885 // We can also pair sign-ext and zero-ext instructions.
4886 switch (FirstOpc) {
4887 default:
4888 return false;
4889 case AArch64::STRSui:
4890 case AArch64::STURSi:
4891 return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
4892 case AArch64::STRDui:
4893 case AArch64::STURDi:
4894 return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
4895 case AArch64::STRQui:
4896 case AArch64::STURQi:
4897 return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
4898 case AArch64::STRWui:
4899 case AArch64::STURWi:
4900 return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
4901 case AArch64::STRXui:
4902 case AArch64::STURXi:
4903 return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
4904 case AArch64::LDRSui:
4905 case AArch64::LDURSi:
4906 return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
4907 case AArch64::LDRDui:
4908 case AArch64::LDURDi:
4909 return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
4910 case AArch64::LDRQui:
4911 case AArch64::LDURQi:
4912 return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
4913 case AArch64::LDRWui:
4914 case AArch64::LDURWi:
4915 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
4916 case AArch64::LDRSWui:
4917 case AArch64::LDURSWi:
4918 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
4919 case AArch64::LDRXui:
4920 case AArch64::LDURXi:
4921 return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
4922 }
4923 // These instructions can't be paired based on their opcodes.
4924 return false;
4925}
4926
4927static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
4928 int64_t Offset1, unsigned Opcode1, int FI2,
4929 int64_t Offset2, unsigned Opcode2) {
4930 // Accesses through fixed stack object frame indices may access a different
4931 // fixed stack slot. Check that the object offsets + offsets match.
4932 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
4933 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
4934 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
4935 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
4936 // Convert to scaled object offsets.
4937 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
4938 if (ObjectOffset1 % Scale1 != 0)
4939 return false;
4940 ObjectOffset1 /= Scale1;
4941 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
4942 if (ObjectOffset2 % Scale2 != 0)
4943 return false;
4944 ObjectOffset2 /= Scale2;
4945 ObjectOffset1 += Offset1;
4946 ObjectOffset2 += Offset2;
4947 return ObjectOffset1 + 1 == ObjectOffset2;
4948 }
4949
4950 return FI1 == FI2;
4951}
4952
4953/// Detect opportunities for ldp/stp formation.
4954///
4955/// Only called for LdSt for which getMemOperandWithOffset returns true.
4957 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
4958 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
4959 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
4960 unsigned NumBytes) const {
4961 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
4962 const MachineOperand &BaseOp1 = *BaseOps1.front();
4963 const MachineOperand &BaseOp2 = *BaseOps2.front();
4964 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
4965 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
4966 if (BaseOp1.getType() != BaseOp2.getType())
4967 return false;
4968
4969 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
4970 "Only base registers and frame indices are supported.");
4971
4972 // Check for both base regs and base FI.
4973 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
4974 return false;
4975
4976 // Only cluster up to a single pair.
4977 if (ClusterSize > 2)
4978 return false;
4979
4980 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
4981 return false;
4982
4983 // Can we pair these instructions based on their opcodes?
4984 unsigned FirstOpc = FirstLdSt.getOpcode();
4985 unsigned SecondOpc = SecondLdSt.getOpcode();
4986 if (!canPairLdStOpc(FirstOpc, SecondOpc))
4987 return false;
4988
4989 // Can't merge volatiles or load/stores that have a hint to avoid pair
4990 // formation, for example.
4991 if (!isCandidateToMergeOrPair(FirstLdSt) ||
4992 !isCandidateToMergeOrPair(SecondLdSt))
4993 return false;
4994
4995 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
4996 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
4997 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
4998 return false;
4999
5000 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
5001 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
5002 return false;
5003
5004 // Pairwise instructions have a 7-bit signed offset field.
5005 if (Offset1 > 63 || Offset1 < -64)
5006 return false;
5007
5008 // The caller should already have ordered First/SecondLdSt by offset.
5009 // Note: except for non-equal frame index bases
5010 if (BaseOp1.isFI()) {
5011 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
5012 "Caller should have ordered offsets.");
5013
5014 const MachineFrameInfo &MFI =
5015 FirstLdSt.getParent()->getParent()->getFrameInfo();
5016 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
5017 BaseOp2.getIndex(), Offset2, SecondOpc);
5018 }
5019
5020 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
5021
5022 return Offset1 + 1 == Offset2;
5023}
5024
5026 MCRegister Reg, unsigned SubIdx,
5027 unsigned State,
5028 const TargetRegisterInfo *TRI) {
5029 if (!SubIdx)
5030 return MIB.addReg(Reg, State);
5031
5032 if (Reg.isPhysical())
5033 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
5034 return MIB.addReg(Reg, State, SubIdx);
5035}
5036
5037static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
5038 unsigned NumRegs) {
5039 // We really want the positive remainder mod 32 here, that happens to be
5040 // easily obtainable with a mask.
5041 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
5042}
5043
5046 const DebugLoc &DL, MCRegister DestReg,
5047 MCRegister SrcReg, bool KillSrc,
5048 unsigned Opcode,
5049 ArrayRef<unsigned> Indices) const {
5050 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
5052 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5053 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5054 unsigned NumRegs = Indices.size();
5055
5056 int SubReg = 0, End = NumRegs, Incr = 1;
5057 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
5058 SubReg = NumRegs - 1;
5059 End = -1;
5060 Incr = -1;
5061 }
5062
5063 for (; SubReg != End; SubReg += Incr) {
5064 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
5065 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
5066 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
5067 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
5068 }
5069}
5070
5073 const DebugLoc &DL, MCRegister DestReg,
5074 MCRegister SrcReg, bool KillSrc,
5075 unsigned Opcode, unsigned ZeroReg,
5076 llvm::ArrayRef<unsigned> Indices) const {
5078 unsigned NumRegs = Indices.size();
5079
5080#ifndef NDEBUG
5081 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5082 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5083 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
5084 "GPR reg sequences should not be able to overlap");
5085#endif
5086
5087 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
5088 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
5089 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
5090 MIB.addReg(ZeroReg);
5091 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
5092 MIB.addImm(0);
5093 }
5094}
5095
5098 const DebugLoc &DL, Register DestReg,
5099 Register SrcReg, bool KillSrc,
5100 bool RenamableDest,
5101 bool RenamableSrc) const {
5102 if (AArch64::GPR32spRegClass.contains(DestReg) &&
5103 AArch64::GPR32spRegClass.contains(SrcReg)) {
5104 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
5105 // If either operand is WSP, expand to ADD #0.
5106 if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5107 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5108 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
5109 MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
5110 &AArch64::GPR64spRegClass);
5111 MCRegister SrcRegX = RI.getMatchingSuperReg(SrcReg, AArch64::sub_32,
5112 &AArch64::GPR64spRegClass);
5113 // This instruction is reading and writing X registers. This may upset
5114 // the register scavenger and machine verifier, so we need to indicate
5115 // that we are reading an undefined value from SrcRegX, but a proper
5116 // value from SrcReg.
5117 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
5118 .addReg(SrcRegX, RegState::Undef)
5119 .addImm(0)
5121 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5122 } else {
5123 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
5124 .addReg(SrcReg, getKillRegState(KillSrc))
5125 .addImm(0)
5127 }
5128 } else if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5129 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5130 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
5131 MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
5132 &AArch64::GPR64spRegClass);
5133 assert(DestRegX.isValid() && "Destination super-reg not valid");
5134 MCRegister SrcRegX = RI.getMatchingSuperReg(SrcReg, AArch64::sub_32,
5135 &AArch64::GPR64spRegClass);
5136 assert(SrcRegX.isValid() && "Source super-reg not valid");
5137 // This instruction is reading and writing X registers. This may upset
5138 // the register scavenger and machine verifier, so we need to indicate
5139 // that we are reading an undefined value from SrcRegX, but a proper
5140 // value from SrcReg.
5141 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
5142 .addReg(AArch64::XZR)
5143 .addReg(SrcRegX, RegState::Undef)
5144 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5145 } else {
5146 // Otherwise, expand to ORR WZR.
5147 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
5148 .addReg(AArch64::WZR)
5149 .addReg(SrcReg, getKillRegState(KillSrc));
5150 }
5151 return;
5152 }
5153
5154 // GPR32 zeroing
5155 if (AArch64::GPR32spRegClass.contains(DestReg) && SrcReg == AArch64::WZR) {
5156 if (Subtarget.hasZeroCycleZeroingGPR64() &&
5157 !Subtarget.hasZeroCycleZeroingGPR32()) {
5158 MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
5159 &AArch64::GPR64spRegClass);
5160 assert(DestRegX.isValid() && "Destination super-reg not valid");
5161 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestRegX)
5162 .addImm(0)
5164 } else if (Subtarget.hasZeroCycleZeroingGPR32()) {
5165 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
5166 .addImm(0)
5168 } else {
5169 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
5170 .addReg(AArch64::WZR)
5171 .addReg(AArch64::WZR);
5172 }
5173 return;
5174 }
5175
5176 if (AArch64::GPR64spRegClass.contains(DestReg) &&
5177 AArch64::GPR64spRegClass.contains(SrcReg)) {
5178 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
5179 // If either operand is SP, expand to ADD #0.
5180 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
5181 .addReg(SrcReg, getKillRegState(KillSrc))
5182 .addImm(0)
5184 } else {
5185 // Otherwise, expand to ORR XZR.
5186 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5187 .addReg(AArch64::XZR)
5188 .addReg(SrcReg, getKillRegState(KillSrc));
5189 }
5190 return;
5191 }
5192
5193 // GPR64 zeroing
5194 if (AArch64::GPR64spRegClass.contains(DestReg) && SrcReg == AArch64::XZR) {
5195 if (Subtarget.hasZeroCycleZeroingGPR64()) {
5196 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
5197 .addImm(0)
5199 } else {
5200 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5201 .addReg(AArch64::XZR)
5202 .addReg(AArch64::XZR);
5203 }
5204 return;
5205 }
5206
5207 // Copy a Predicate register by ORRing with itself.
5208 if (AArch64::PPRRegClass.contains(DestReg) &&
5209 AArch64::PPRRegClass.contains(SrcReg)) {
5210 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5211 "Unexpected SVE register.");
5212 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
5213 .addReg(SrcReg) // Pg
5214 .addReg(SrcReg)
5215 .addReg(SrcReg, getKillRegState(KillSrc));
5216 return;
5217 }
5218
5219 // Copy a predicate-as-counter register by ORRing with itself as if it
5220 // were a regular predicate (mask) register.
5221 bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
5222 bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
5223 if (DestIsPNR || SrcIsPNR) {
5224 auto ToPPR = [](MCRegister R) -> MCRegister {
5225 return (R - AArch64::PN0) + AArch64::P0;
5226 };
5227 MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg.asMCReg();
5228 MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg.asMCReg();
5229
5230 if (PPRSrcReg != PPRDestReg) {
5231 auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
5232 .addReg(PPRSrcReg) // Pg
5233 .addReg(PPRSrcReg)
5234 .addReg(PPRSrcReg, getKillRegState(KillSrc));
5235 if (DestIsPNR)
5236 NewMI.addDef(DestReg, RegState::Implicit);
5237 }
5238 return;
5239 }
5240
5241 // Copy a Z register by ORRing with itself.
5242 if (AArch64::ZPRRegClass.contains(DestReg) &&
5243 AArch64::ZPRRegClass.contains(SrcReg)) {
5244 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5245 "Unexpected SVE register.");
5246 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
5247 .addReg(SrcReg)
5248 .addReg(SrcReg, getKillRegState(KillSrc));
5249 return;
5250 }
5251
5252 // Copy a Z register pair by copying the individual sub-registers.
5253 if ((AArch64::ZPR2RegClass.contains(DestReg) ||
5254 AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
5255 (AArch64::ZPR2RegClass.contains(SrcReg) ||
5256 AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
5257 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5258 "Unexpected SVE register.");
5259 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
5260 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5261 Indices);
5262 return;
5263 }
5264
5265 // Copy a Z register triple by copying the individual sub-registers.
5266 if (AArch64::ZPR3RegClass.contains(DestReg) &&
5267 AArch64::ZPR3RegClass.contains(SrcReg)) {
5268 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5269 "Unexpected SVE register.");
5270 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5271 AArch64::zsub2};
5272 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5273 Indices);
5274 return;
5275 }
5276
5277 // Copy a Z register quad by copying the individual sub-registers.
5278 if ((AArch64::ZPR4RegClass.contains(DestReg) ||
5279 AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
5280 (AArch64::ZPR4RegClass.contains(SrcReg) ||
5281 AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
5282 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5283 "Unexpected SVE register.");
5284 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5285 AArch64::zsub2, AArch64::zsub3};
5286 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5287 Indices);
5288 return;
5289 }
5290
5291 // Copy a DDDD register quad by copying the individual sub-registers.
5292 if (AArch64::DDDDRegClass.contains(DestReg) &&
5293 AArch64::DDDDRegClass.contains(SrcReg)) {
5294 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5295 AArch64::dsub2, AArch64::dsub3};
5296 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5297 Indices);
5298 return;
5299 }
5300
5301 // Copy a DDD register triple by copying the individual sub-registers.
5302 if (AArch64::DDDRegClass.contains(DestReg) &&
5303 AArch64::DDDRegClass.contains(SrcReg)) {
5304 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5305 AArch64::dsub2};
5306 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5307 Indices);
5308 return;
5309 }
5310
5311 // Copy a DD register pair by copying the individual sub-registers.
5312 if (AArch64::DDRegClass.contains(DestReg) &&
5313 AArch64::DDRegClass.contains(SrcReg)) {
5314 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
5315 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5316 Indices);
5317 return;
5318 }
5319
5320 // Copy a QQQQ register quad by copying the individual sub-registers.
5321 if (AArch64::QQQQRegClass.contains(DestReg) &&
5322 AArch64::QQQQRegClass.contains(SrcReg)) {
5323 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5324 AArch64::qsub2, AArch64::qsub3};
5325 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5326 Indices);
5327 return;
5328 }
5329
5330 // Copy a QQQ register triple by copying the individual sub-registers.
5331 if (AArch64::QQQRegClass.contains(DestReg) &&
5332 AArch64::QQQRegClass.contains(SrcReg)) {
5333 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5334 AArch64::qsub2};
5335 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5336 Indices);
5337 return;
5338 }
5339
5340 // Copy a QQ register pair by copying the individual sub-registers.
5341 if (AArch64::QQRegClass.contains(DestReg) &&
5342 AArch64::QQRegClass.contains(SrcReg)) {
5343 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
5344 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5345 Indices);
5346 return;
5347 }
5348
5349 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
5350 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
5351 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
5352 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
5353 AArch64::XZR, Indices);
5354 return;
5355 }
5356
5357 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
5358 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
5359 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
5360 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
5361 AArch64::WZR, Indices);
5362 return;
5363 }
5364
5365 if (AArch64::FPR128RegClass.contains(DestReg) &&
5366 AArch64::FPR128RegClass.contains(SrcReg)) {
5367 if (Subtarget.isSVEorStreamingSVEAvailable() &&
5368 !Subtarget.isNeonAvailable())
5369 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
5370 .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
5371 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
5372 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
5373 else if (Subtarget.isNeonAvailable())
5374 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
5375 .addReg(SrcReg)
5376 .addReg(SrcReg, getKillRegState(KillSrc));
5377 else {
5378 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
5379 .addReg(AArch64::SP, RegState::Define)
5380 .addReg(SrcReg, getKillRegState(KillSrc))
5381 .addReg(AArch64::SP)
5382 .addImm(-16);
5383 BuildMI(MBB, I, DL, get(AArch64::LDRQpost))
5384 .addReg(AArch64::SP, RegState::Define)
5385 .addReg(DestReg, RegState::Define)
5386 .addReg(AArch64::SP)
5387 .addImm(16);
5388 }
5389 return;
5390 }
5391
5392 if (AArch64::FPR64RegClass.contains(DestReg) &&
5393 AArch64::FPR64RegClass.contains(SrcReg)) {
5394 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5395 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5396 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5397 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
5398 &AArch64::FPR128RegClass);
5399 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
5400 &AArch64::FPR128RegClass);
5401 // This instruction is reading and writing Q registers. This may upset
5402 // the register scavenger and machine verifier, so we need to indicate
5403 // that we are reading an undefined value from SrcRegQ, but a proper
5404 // value from SrcReg.
5405 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5406 .addReg(SrcRegQ, RegState::Undef)
5407 .addReg(SrcRegQ, RegState::Undef)
5408 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5409 } else {
5410 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
5411 .addReg(SrcReg, getKillRegState(KillSrc));
5412 }
5413 return;
5414 }
5415
5416 if (AArch64::FPR32RegClass.contains(DestReg) &&
5417 AArch64::FPR32RegClass.contains(SrcReg)) {
5418 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5419 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5420 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5421 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
5422 &AArch64::FPR128RegClass);
5423 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
5424 &AArch64::FPR128RegClass);
5425 // This instruction is reading and writing Q registers. This may upset
5426 // the register scavenger and machine verifier, so we need to indicate
5427 // that we are reading an undefined value from SrcRegQ, but a proper
5428 // value from SrcReg.
5429 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5430 .addReg(SrcRegQ, RegState::Undef)
5431 .addReg(SrcRegQ, RegState::Undef)
5432 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5433 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5434 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5435 MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
5436 &AArch64::FPR64RegClass);
5437 MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
5438 &AArch64::FPR64RegClass);
5439 // This instruction is reading and writing D registers. This may upset
5440 // the register scavenger and machine verifier, so we need to indicate
5441 // that we are reading an undefined value from SrcRegD, but a proper
5442 // value from SrcReg.
5443 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5444 .addReg(SrcRegD, RegState::Undef)
5445 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5446 } else {
5447 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5448 .addReg(SrcReg, getKillRegState(KillSrc));
5449 }
5450 return;
5451 }
5452
5453 if (AArch64::FPR16RegClass.contains(DestReg) &&
5454 AArch64::FPR16RegClass.contains(SrcReg)) {
5455 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5456 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5457 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5458 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
5459 &AArch64::FPR128RegClass);
5460 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
5461 &AArch64::FPR128RegClass);
5462 // This instruction is reading and writing Q registers. This may upset
5463 // the register scavenger and machine verifier, so we need to indicate
5464 // that we are reading an undefined value from SrcRegQ, but a proper
5465 // value from SrcReg.
5466 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5467 .addReg(SrcRegQ, RegState::Undef)
5468 .addReg(SrcRegQ, RegState::Undef)
5469 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5470 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5471 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5472 MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
5473 &AArch64::FPR64RegClass);
5474 MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
5475 &AArch64::FPR64RegClass);
5476 // This instruction is reading and writing D registers. This may upset
5477 // the register scavenger and machine verifier, so we need to indicate
5478 // that we are reading an undefined value from SrcRegD, but a proper
5479 // value from SrcReg.
5480 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5481 .addReg(SrcRegD, RegState::Undef)
5482 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5483 } else {
5484 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
5485 &AArch64::FPR32RegClass);
5486 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
5487 &AArch64::FPR32RegClass);
5488 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5489 .addReg(SrcReg, getKillRegState(KillSrc));
5490 }
5491 return;
5492 }
5493
5494 if (AArch64::FPR8RegClass.contains(DestReg) &&
5495 AArch64::FPR8RegClass.contains(SrcReg)) {
5496 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5497 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5498 !Subtarget.hasZeroCycleRegMoveFPR64() && Subtarget.isNeonAvailable()) {
5499 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
5500 &AArch64::FPR128RegClass);
5501 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
5502 &AArch64::FPR128RegClass);
5503 // This instruction is reading and writing Q registers. This may upset
5504 // the register scavenger and machine verifier, so we need to indicate
5505 // that we are reading an undefined value from SrcRegQ, but a proper
5506 // value from SrcReg.
5507 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5508 .addReg(SrcRegQ, RegState::Undef)
5509 .addReg(SrcRegQ, RegState::Undef)
5510 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5511 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5512 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5513 MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
5514 &AArch64::FPR64RegClass);
5515 MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
5516 &AArch64::FPR64RegClass);
5517 // This instruction is reading and writing D registers. This may upset
5518 // the register scavenger and machine verifier, so we need to indicate
5519 // that we are reading an undefined value from SrcRegD, but a proper
5520 // value from SrcReg.
5521 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5522 .addReg(SrcRegD, RegState::Undef)
5523 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5524 } else {
5525 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
5526 &AArch64::FPR32RegClass);
5527 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
5528 &AArch64::FPR32RegClass);
5529 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5530 .addReg(SrcReg, getKillRegState(KillSrc));
5531 }
5532 return;
5533 }
5534
5535 // Copies between GPR64 and FPR64.
5536 if (AArch64::FPR64RegClass.contains(DestReg) &&
5537 AArch64::GPR64RegClass.contains(SrcReg)) {
5538 if (AArch64::XZR == SrcReg) {
5539 BuildMI(MBB, I, DL, get(AArch64::FMOVD0), DestReg);
5540 } else {
5541 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
5542 .addReg(SrcReg, getKillRegState(KillSrc));
5543 }
5544 return;
5545 }
5546 if (AArch64::GPR64RegClass.contains(DestReg) &&
5547 AArch64::FPR64RegClass.contains(SrcReg)) {
5548 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
5549 .addReg(SrcReg, getKillRegState(KillSrc));
5550 return;
5551 }
5552 // Copies between GPR32 and FPR32.
5553 if (AArch64::FPR32RegClass.contains(DestReg) &&
5554 AArch64::GPR32RegClass.contains(SrcReg)) {
5555 if (AArch64::WZR == SrcReg) {
5556 BuildMI(MBB, I, DL, get(AArch64::FMOVS0), DestReg);
5557 } else {
5558 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
5559 .addReg(SrcReg, getKillRegState(KillSrc));
5560 }
5561 return;
5562 }
5563 if (AArch64::GPR32RegClass.contains(DestReg) &&
5564 AArch64::FPR32RegClass.contains(SrcReg)) {
5565 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
5566 .addReg(SrcReg, getKillRegState(KillSrc));
5567 return;
5568 }
5569
5570 if (DestReg == AArch64::NZCV) {
5571 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
5572 BuildMI(MBB, I, DL, get(AArch64::MSR))
5573 .addImm(AArch64SysReg::NZCV)
5574 .addReg(SrcReg, getKillRegState(KillSrc))
5575 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
5576 return;
5577 }
5578
5579 if (SrcReg == AArch64::NZCV) {
5580 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
5581 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
5582 .addImm(AArch64SysReg::NZCV)
5583 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
5584 return;
5585 }
5586
5587#ifndef NDEBUG
5588 errs() << RI.getRegAsmName(DestReg) << " = COPY " << RI.getRegAsmName(SrcReg)
5589 << "\n";
5590#endif
5591 llvm_unreachable("unimplemented reg-to-reg copy");
5592}
5593
5596 MachineBasicBlock::iterator InsertBefore,
5597 const MCInstrDesc &MCID,
5598 Register SrcReg, bool IsKill,
5599 unsigned SubIdx0, unsigned SubIdx1, int FI,
5600 MachineMemOperand *MMO) {
5601 Register SrcReg0 = SrcReg;
5602 Register SrcReg1 = SrcReg;
5603 if (SrcReg.isPhysical()) {
5604 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
5605 SubIdx0 = 0;
5606 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
5607 SubIdx1 = 0;
5608 }
5609 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
5610 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
5611 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
5612 .addFrameIndex(FI)
5613 .addImm(0)
5614 .addMemOperand(MMO);
5615}
5616
5619 Register SrcReg, bool isKill, int FI,
5620 const TargetRegisterClass *RC,
5621 const TargetRegisterInfo *TRI,
5622 Register VReg,
5623 MachineInstr::MIFlag Flags) const {
5624 MachineFunction &MF = *MBB.getParent();
5625 MachineFrameInfo &MFI = MF.getFrameInfo();
5626
5628 MachineMemOperand *MMO =
5630 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
5631 unsigned Opc = 0;
5632 bool Offset = true;
5634 unsigned StackID = TargetStackID::Default;
5635 switch (TRI->getSpillSize(*RC)) {
5636 case 1:
5637 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5638 Opc = AArch64::STRBui;
5639 break;
5640 case 2: {
5641 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5642 Opc = AArch64::STRHui;
5643 else if (AArch64::PNRRegClass.hasSubClassEq(RC) ||
5644 AArch64::PPRRegClass.hasSubClassEq(RC)) {
5645 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5646 "Unexpected register store without SVE store instructions");
5647 Opc = AArch64::STR_PXI;
5649 }
5650 break;
5651 }
5652 case 4:
5653 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5654 Opc = AArch64::STRWui;
5655 if (SrcReg.isVirtual())
5656 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
5657 else
5658 assert(SrcReg != AArch64::WSP);
5659 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5660 Opc = AArch64::STRSui;
5661 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5662 Opc = AArch64::STR_PPXI;
5664 }
5665 break;
5666 case 8:
5667 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5668 Opc = AArch64::STRXui;
5669 if (SrcReg.isVirtual())
5670 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
5671 else
5672 assert(SrcReg != AArch64::SP);
5673 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5674 Opc = AArch64::STRDui;
5675 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5677 get(AArch64::STPWi), SrcReg, isKill,
5678 AArch64::sube32, AArch64::subo32, FI, MMO);
5679 return;
5680 }
5681 break;
5682 case 16:
5683 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5684 Opc = AArch64::STRQui;
5685 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5686 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5687 Opc = AArch64::ST1Twov1d;
5688 Offset = false;
5689 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5691 get(AArch64::STPXi), SrcReg, isKill,
5692 AArch64::sube64, AArch64::subo64, FI, MMO);
5693 return;
5694 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5695 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5696 "Unexpected register store without SVE store instructions");
5697 Opc = AArch64::STR_ZXI;
5699 }
5700 break;
5701 case 24:
5702 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5703 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5704 Opc = AArch64::ST1Threev1d;
5705 Offset = false;
5706 }
5707 break;
5708 case 32:
5709 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5710 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5711 Opc = AArch64::ST1Fourv1d;
5712 Offset = false;
5713 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5714 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5715 Opc = AArch64::ST1Twov2d;
5716 Offset = false;
5717 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5718 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5719 "Unexpected register store without SVE store instructions");
5720 Opc = AArch64::STR_ZZXI_STRIDED_CONTIGUOUS;
5722 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
5723 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5724 "Unexpected register store without SVE store instructions");
5725 Opc = AArch64::STR_ZZXI;
5727 }
5728 break;
5729 case 48:
5730 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5731 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5732 Opc = AArch64::ST1Threev2d;
5733 Offset = false;
5734 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5735 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5736 "Unexpected register store without SVE store instructions");
5737 Opc = AArch64::STR_ZZZXI;
5739 }
5740 break;
5741 case 64:
5742 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5743 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5744 Opc = AArch64::ST1Fourv2d;
5745 Offset = false;
5746 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5747 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5748 "Unexpected register store without SVE store instructions");
5749 Opc = AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS;
5751 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
5752 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5753 "Unexpected register store without SVE store instructions");
5754 Opc = AArch64::STR_ZZZZXI;
5756 }
5757 break;
5758 }
5759 assert(Opc && "Unknown register class");
5760 MFI.setStackID(FI, StackID);
5761
5763 .addReg(SrcReg, getKillRegState(isKill))
5764 .addFrameIndex(FI);
5765
5766 if (Offset)
5767 MI.addImm(0);
5768 if (PNRReg.isValid())
5769 MI.addDef(PNRReg, RegState::Implicit);
5770 MI.addMemOperand(MMO);
5771}
5772
5775 MachineBasicBlock::iterator InsertBefore,
5776 const MCInstrDesc &MCID,
5777 Register DestReg, unsigned SubIdx0,
5778 unsigned SubIdx1, int FI,
5779 MachineMemOperand *MMO) {
5780 Register DestReg0 = DestReg;
5781 Register DestReg1 = DestReg;
5782 bool IsUndef = true;
5783 if (DestReg.isPhysical()) {
5784 DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
5785 SubIdx0 = 0;
5786 DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
5787 SubIdx1 = 0;
5788 IsUndef = false;
5789 }
5790 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
5791 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
5792 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
5793 .addFrameIndex(FI)
5794 .addImm(0)
5795 .addMemOperand(MMO);
5796}
5797
5800 int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
5801 Register VReg, MachineInstr::MIFlag Flags) const {
5802 MachineFunction &MF = *MBB.getParent();
5803 MachineFrameInfo &MFI = MF.getFrameInfo();
5805 MachineMemOperand *MMO =
5807 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
5808
5809 unsigned Opc = 0;
5810 bool Offset = true;
5811 unsigned StackID = TargetStackID::Default;
5813 switch (TRI->getSpillSize(*RC)) {
5814 case 1:
5815 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5816 Opc = AArch64::LDRBui;
5817 break;
5818 case 2: {
5819 bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
5820 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5821 Opc = AArch64::LDRHui;
5822 else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
5823 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5824 "Unexpected register load without SVE load instructions");
5825 if (IsPNR)
5826 PNRReg = DestReg;
5827 Opc = AArch64::LDR_PXI;
5829 }
5830 break;
5831 }
5832 case 4:
5833 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5834 Opc = AArch64::LDRWui;
5835 if (DestReg.isVirtual())
5836 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
5837 else
5838 assert(DestReg != AArch64::WSP);
5839 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5840 Opc = AArch64::LDRSui;
5841 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5842 Opc = AArch64::LDR_PPXI;
5844 }
5845 break;
5846 case 8:
5847 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5848 Opc = AArch64::LDRXui;
5849 if (DestReg.isVirtual())
5850 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
5851 else
5852 assert(DestReg != AArch64::SP);
5853 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5854 Opc = AArch64::LDRDui;
5855 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5857 get(AArch64::LDPWi), DestReg, AArch64::sube32,
5858 AArch64::subo32, FI, MMO);
5859 return;
5860 }
5861 break;
5862 case 16:
5863 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5864 Opc = AArch64::LDRQui;
5865 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5866 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5867 Opc = AArch64::LD1Twov1d;
5868 Offset = false;
5869 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5871 get(AArch64::LDPXi), DestReg, AArch64::sube64,
5872 AArch64::subo64, FI, MMO);
5873 return;
5874 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5875 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5876 "Unexpected register load without SVE load instructions");
5877 Opc = AArch64::LDR_ZXI;
5879 }
5880 break;
5881 case 24:
5882 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5883 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5884 Opc = AArch64::LD1Threev1d;
5885 Offset = false;
5886 }
5887 break;
5888 case 32:
5889 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5890 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5891 Opc = AArch64::LD1Fourv1d;
5892 Offset = false;
5893 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5894 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5895 Opc = AArch64::LD1Twov2d;
5896 Offset = false;
5897 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5898 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5899 "Unexpected register load without SVE load instructions");
5900 Opc = AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS;
5902 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
5903 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5904 "Unexpected register load without SVE load instructions");
5905 Opc = AArch64::LDR_ZZXI;
5907 }
5908 break;
5909 case 48:
5910 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5911 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5912 Opc = AArch64::LD1Threev2d;
5913 Offset = false;
5914 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5915 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5916 "Unexpected register load without SVE load instructions");
5917 Opc = AArch64::LDR_ZZZXI;
5919 }
5920 break;
5921 case 64:
5922 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5923 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5924 Opc = AArch64::LD1Fourv2d;
5925 Offset = false;
5926 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5927 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5928 "Unexpected register load without SVE load instructions");
5929 Opc = AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS;
5931 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
5932 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5933 "Unexpected register load without SVE load instructions");
5934 Opc = AArch64::LDR_ZZZZXI;
5936 }
5937 break;
5938 }
5939
5940 assert(Opc && "Unknown register class");
5941 MFI.setStackID(FI, StackID);
5942
5944 .addReg(DestReg, getDefRegState(true))
5945 .addFrameIndex(FI);
5946 if (Offset)
5947 MI.addImm(0);
5948 if (PNRReg.isValid() && !PNRReg.isVirtual())
5949 MI.addDef(PNRReg, RegState::Implicit);
5950 MI.addMemOperand(MMO);
5951}
5952
5954 const MachineInstr &UseMI,
5955 const TargetRegisterInfo *TRI) {
5956 return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
5957 UseMI.getIterator()),
5958 [TRI](const MachineInstr &I) {
5959 return I.modifiesRegister(AArch64::NZCV, TRI) ||
5960 I.readsRegister(AArch64::NZCV, TRI);
5961 });
5962}
5963
5964void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
5965 const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
5966 // The smallest scalable element supported by scaled SVE addressing
5967 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5968 // byte offset must always be a multiple of 2.
5969 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5970
5971 // VGSized offsets are divided by '2', because the VG register is the
5972 // the number of 64bit granules as opposed to 128bit vector chunks,
5973 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
5974 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
5975 // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
5976 ByteSized = Offset.getFixed();
5977 VGSized = Offset.getScalable() / 2;
5978}
5979
5980/// Returns the offset in parts to which this frame offset can be
5981/// decomposed for the purpose of describing a frame offset.
5982/// For non-scalable offsets this is simply its byte size.
5983void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
5984 const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
5985 int64_t &NumDataVectors) {
5986 // The smallest scalable element supported by scaled SVE addressing
5987 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5988 // byte offset must always be a multiple of 2.
5989 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5990
5991 NumBytes = Offset.getFixed();
5992 NumDataVectors = 0;
5993 NumPredicateVectors = Offset.getScalable() / 2;
5994 // This method is used to get the offsets to adjust the frame offset.
5995 // If the function requires ADDPL to be used and needs more than two ADDPL
5996 // instructions, part of the offset is folded into NumDataVectors so that it
5997 // uses ADDVL for part of it, reducing the number of ADDPL instructions.
5998 if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
5999 NumPredicateVectors > 62) {
6000 NumDataVectors = NumPredicateVectors / 8;
6001 NumPredicateVectors -= NumDataVectors * 8;
6002 }
6003}
6004
6005// Convenience function to create a DWARF expression for: Constant `Operation`.
6006// This helper emits compact sequences for common cases. For example, for`-15
6007// DW_OP_plus`, this helper would create DW_OP_lit15 DW_OP_minus.
6010 if (Operation == dwarf::DW_OP_plus && Constant < 0 && -Constant <= 31) {
6011 // -Constant (1 to 31)
6012 Expr.push_back(dwarf::DW_OP_lit0 - Constant);
6013 Operation = dwarf::DW_OP_minus;
6014 } else if (Constant >= 0 && Constant <= 31) {
6015 // Literal value 0 to 31
6016 Expr.push_back(dwarf::DW_OP_lit0 + Constant);
6017 } else {
6018 // Signed constant
6019 Expr.push_back(dwarf::DW_OP_consts);
6021 }
6022 return Expr.push_back(Operation);
6023}
6024
6025// Convenience function to create a DWARF expression for a register.
6026static void appendReadRegExpr(SmallVectorImpl<char> &Expr, unsigned RegNum) {
6027 Expr.push_back((char)dwarf::DW_OP_bregx);
6029 Expr.push_back(0);
6030}
6031
6032// Convenience function to create a DWARF expression for loading a register from
6033// a CFA offset.
6035 int64_t OffsetFromDefCFA) {
6036 // This assumes the top of the DWARF stack contains the CFA.
6037 Expr.push_back(dwarf::DW_OP_dup);
6038 // Add the offset to the register.
6039 appendConstantExpr(Expr, OffsetFromDefCFA, dwarf::DW_OP_plus);
6040 // Dereference the address (loads a 64 bit value)..
6041 Expr.push_back(dwarf::DW_OP_deref);
6042}
6043
6044// Convenience function to create a comment for
6045// (+/-) NumBytes (* RegScale)?
6046static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment,
6047 StringRef RegScale = {}) {
6048 if (NumBytes) {
6049 Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
6050 if (!RegScale.empty())
6051 Comment << ' ' << RegScale;
6052 }
6053}
6054
6055// Creates an MCCFIInstruction:
6056// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
6058 unsigned Reg,
6059 const StackOffset &Offset) {
6060 int64_t NumBytes, NumVGScaledBytes;
6061 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(Offset, NumBytes,
6062 NumVGScaledBytes);
6063 std::string CommentBuffer;
6064 llvm::raw_string_ostream Comment(CommentBuffer);
6065
6066 if (Reg == AArch64::SP)
6067 Comment << "sp";
6068 else if (Reg == AArch64::FP)
6069 Comment << "fp";
6070 else
6071 Comment << printReg(Reg, &TRI);
6072
6073 // Build up the expression (Reg + NumBytes + VG * NumVGScaledBytes)
6074 SmallString<64> Expr;
6075 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6076 assert(DwarfReg <= 31 && "DwarfReg out of bounds (0..31)");
6077 // Reg + NumBytes
6078 Expr.push_back(dwarf::DW_OP_breg0 + DwarfReg);
6079 appendLEB128<LEB128Sign::Signed>(Expr, NumBytes);
6080 appendOffsetComment(NumBytes, Comment);
6081 if (NumVGScaledBytes) {
6082 // + VG * NumVGScaledBytes
6083 appendOffsetComment(NumVGScaledBytes, Comment, "* VG");
6084 appendReadRegExpr(Expr, TRI.getDwarfRegNum(AArch64::VG, true));
6085 appendConstantExpr(Expr, NumVGScaledBytes, dwarf::DW_OP_mul);
6086 Expr.push_back(dwarf::DW_OP_plus);
6087 }
6088
6089 // Wrap this into DW_CFA_def_cfa.
6090 SmallString<64> DefCfaExpr;
6091 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
6092 appendLEB128<LEB128Sign::Unsigned>(DefCfaExpr, Expr.size());
6093 DefCfaExpr.append(Expr.str());
6094 return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
6095 Comment.str());
6096}
6097
6099 unsigned FrameReg, unsigned Reg,
6100 const StackOffset &Offset,
6101 bool LastAdjustmentWasScalable) {
6102 if (Offset.getScalable())
6103 return createDefCFAExpression(TRI, Reg, Offset);
6104
6105 if (FrameReg == Reg && !LastAdjustmentWasScalable)
6106 return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed()));
6107
6108 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6109 return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed());
6110}
6111
6114 const StackOffset &OffsetFromDefCFA,
6115 std::optional<int64_t> IncomingVGOffsetFromDefCFA) {
6116 int64_t NumBytes, NumVGScaledBytes;
6117 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
6118 OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
6119
6120 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6121
6122 // Non-scalable offsets can use DW_CFA_offset directly.
6123 if (!NumVGScaledBytes)
6124 return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);
6125
6126 std::string CommentBuffer;
6127 llvm::raw_string_ostream Comment(CommentBuffer);
6128 Comment << printReg(Reg, &TRI) << " @ cfa";
6129
6130 // Build up expression (CFA + VG * NumVGScaledBytes + NumBytes)
6131 assert(NumVGScaledBytes && "Expected scalable offset");
6132 SmallString<64> OffsetExpr;
6133 // + VG * NumVGScaledBytes
6134 StringRef VGRegScale;
6135 if (IncomingVGOffsetFromDefCFA) {
6136 appendLoadRegExpr(OffsetExpr, *IncomingVGOffsetFromDefCFA);
6137 VGRegScale = "* IncomingVG";
6138 } else {
6139 appendReadRegExpr(OffsetExpr, TRI.getDwarfRegNum(AArch64::VG, true));
6140 VGRegScale = "* VG";
6141 }
6142 appendConstantExpr(OffsetExpr, NumVGScaledBytes, dwarf::DW_OP_mul);
6143 appendOffsetComment(NumVGScaledBytes, Comment, VGRegScale);
6144 OffsetExpr.push_back(dwarf::DW_OP_plus);
6145 if (NumBytes) {
6146 // + NumBytes
6147 appendOffsetComment(NumBytes, Comment);
6148 appendConstantExpr(OffsetExpr, NumBytes, dwarf::DW_OP_plus);
6149 }
6150
6151 // Wrap this into DW_CFA_expression
6152 SmallString<64> CfaExpr;
6153 CfaExpr.push_back(dwarf::DW_CFA_expression);
6154 appendLEB128<LEB128Sign::Unsigned>(CfaExpr, DwarfReg);
6155 appendLEB128<LEB128Sign::Unsigned>(CfaExpr, OffsetExpr.size());
6156 CfaExpr.append(OffsetExpr.str());
6157
6158 return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(),
6159 Comment.str());
6160}
6161
6162// Helper function to emit a frame offset adjustment from a given
6163// pointer (SrcReg), stored into DestReg. This function is explicit
6164// in that it requires the opcode.
6167 const DebugLoc &DL, unsigned DestReg,
6168 unsigned SrcReg, int64_t Offset, unsigned Opc,
6169 const TargetInstrInfo *TII,
6170 MachineInstr::MIFlag Flag, bool NeedsWinCFI,
6171 bool *HasWinCFI, bool EmitCFAOffset,
6172 StackOffset CFAOffset, unsigned FrameReg) {
6173 int Sign = 1;
6174 unsigned MaxEncoding, ShiftSize;
6175 switch (Opc) {
6176 case AArch64::ADDXri:
6177 case AArch64::ADDSXri:
6178 case AArch64::SUBXri:
6179 case AArch64::SUBSXri:
6180 MaxEncoding = 0xfff;
6181 ShiftSize = 12;
6182 break;
6183 case AArch64::ADDVL_XXI:
6184 case AArch64::ADDPL_XXI:
6185 case AArch64::ADDSVL_XXI:
6186 case AArch64::ADDSPL_XXI:
6187 MaxEncoding = 31;
6188 ShiftSize = 0;
6189 if (Offset < 0) {
6190 MaxEncoding = 32;
6191 Sign = -1;
6192 Offset = -Offset;
6193 }
6194 break;
6195 default:
6196 llvm_unreachable("Unsupported opcode");
6197 }
6198
6199 // `Offset` can be in bytes or in "scalable bytes".
6200 int VScale = 1;
6201 if (Opc == AArch64::ADDVL_XXI || Opc == AArch64::ADDSVL_XXI)
6202 VScale = 16;
6203 else if (Opc == AArch64::ADDPL_XXI || Opc == AArch64::ADDSPL_XXI)
6204 VScale = 2;
6205
6206 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
6207 // scratch register. If DestReg is a virtual register, use it as the
6208 // scratch register; otherwise, create a new virtual register (to be
6209 // replaced by the scavenger at the end of PEI). That case can be optimized
6210 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
6211 // register can be loaded with offset%8 and the add/sub can use an extending
6212 // instruction with LSL#3.
6213 // Currently the function handles any offsets but generates a poor sequence
6214 // of code.
6215 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
6216
6217 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
6218 Register TmpReg = DestReg;
6219 if (TmpReg == AArch64::XZR)
6220 TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister(
6221 &AArch64::GPR64RegClass);
6222 do {
6223 uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
6224 unsigned LocalShiftSize = 0;
6225 if (ThisVal > MaxEncoding) {
6226 ThisVal = ThisVal >> ShiftSize;
6227 LocalShiftSize = ShiftSize;
6228 }
6229 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
6230 "Encoding cannot handle value that big");
6231
6232 Offset -= ThisVal << LocalShiftSize;
6233 if (Offset == 0)
6234 TmpReg = DestReg;
6235 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
6236 .addReg(SrcReg)
6237 .addImm(Sign * (int)ThisVal);
6238 if (ShiftSize)
6239 MBI = MBI.addImm(
6241 MBI = MBI.setMIFlag(Flag);
6242
6243 auto Change =
6244 VScale == 1
6245 ? StackOffset::getFixed(ThisVal << LocalShiftSize)
6246 : StackOffset::getScalable(VScale * (ThisVal << LocalShiftSize));
6247 if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri)
6248 CFAOffset += Change;
6249 else
6250 CFAOffset -= Change;
6251 if (EmitCFAOffset && DestReg == TmpReg) {
6252 MachineFunction &MF = *MBB.getParent();
6253 const TargetSubtargetInfo &STI = MF.getSubtarget();
6254 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
6255
6256 unsigned CFIIndex = MF.addFrameInst(
6257 createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1));
6258 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
6259 .addCFIIndex(CFIIndex)
6260 .setMIFlags(Flag);
6261 }
6262
6263 if (NeedsWinCFI) {
6264 int Imm = (int)(ThisVal << LocalShiftSize);
6265 if (VScale != 1 && DestReg == AArch64::SP) {
6266 if (HasWinCFI)
6267 *HasWinCFI = true;
6268 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AllocZ))
6269 .addImm(ThisVal)
6270 .setMIFlag(Flag);
6271 } else if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
6272 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
6273 assert(VScale == 1 && "Expected non-scalable operation");
6274 if (HasWinCFI)
6275 *HasWinCFI = true;
6276 if (Imm == 0)
6277 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
6278 else
6279 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
6280 .addImm(Imm)
6281 .setMIFlag(Flag);
6282 assert(Offset == 0 && "Expected remaining offset to be zero to "
6283 "emit a single SEH directive");
6284 } else if (DestReg == AArch64::SP) {
6285 assert(VScale == 1 && "Expected non-scalable operation");
6286 if (HasWinCFI)
6287 *HasWinCFI = true;
6288 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
6289 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
6290 .addImm(Imm)
6291 .setMIFlag(Flag);
6292 }
6293 }
6294
6295 SrcReg = TmpReg;
6296 } while (Offset);
6297}
6298
6301 unsigned DestReg, unsigned SrcReg,
6303 MachineInstr::MIFlag Flag, bool SetNZCV,
6304 bool NeedsWinCFI, bool *HasWinCFI,
6305 bool EmitCFAOffset, StackOffset CFAOffset,
6306 unsigned FrameReg) {
6307 // If a function is marked as arm_locally_streaming, then the runtime value of
6308 // vscale in the prologue/epilogue is different the runtime value of vscale
6309 // in the function's body. To avoid having to consider multiple vscales,
6310 // we can use `addsvl` to allocate any scalable stack-slots, which under
6311 // most circumstances will be only locals, not callee-save slots.
6312 const Function &F = MBB.getParent()->getFunction();
6313 bool UseSVL = F.hasFnAttribute("aarch64_pstate_sm_body");
6314
6315 int64_t Bytes, NumPredicateVectors, NumDataVectors;
6316 AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
6317 Offset, Bytes, NumPredicateVectors, NumDataVectors);
6318
6319 // Insert ADDSXri for scalable offset at the end.
6320 bool NeedsFinalDefNZCV = SetNZCV && (NumPredicateVectors || NumDataVectors);
6321 if (NeedsFinalDefNZCV)
6322 SetNZCV = false;
6323
6324 // First emit non-scalable frame offsets, or a simple 'mov'.
6325 if (Bytes || (!Offset && SrcReg != DestReg)) {
6326 assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
6327 "SP increment/decrement not 8-byte aligned");
6328 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
6329 if (Bytes < 0) {
6330 Bytes = -Bytes;
6331 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
6332 }
6333 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
6334 NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6335 FrameReg);
6336 CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri)
6337 ? StackOffset::getFixed(-Bytes)
6338 : StackOffset::getFixed(Bytes);
6339 SrcReg = DestReg;
6340 FrameReg = DestReg;
6341 }
6342
6343 assert(!(NeedsWinCFI && NumPredicateVectors) &&
6344 "WinCFI can't allocate fractions of an SVE data vector");
6345
6346 if (NumDataVectors) {
6347 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
6348 UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI, TII,
6349 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6350 FrameReg);
6351 CFAOffset += StackOffset::getScalable(-NumDataVectors * 16);
6352 SrcReg = DestReg;
6353 }
6354
6355 if (NumPredicateVectors) {
6356 assert(DestReg != AArch64::SP && "Unaligned access to SP");
6357 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
6358 UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI, TII,
6359 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6360 FrameReg);
6361 }
6362
6363 if (NeedsFinalDefNZCV)
6364 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDSXri), DestReg)
6365 .addReg(DestReg)
6366 .addImm(0)
6367 .addImm(0);
6368}
6369
6372 MachineBasicBlock::iterator InsertPt, int FrameIndex,
6373 LiveIntervals *LIS, VirtRegMap *VRM) const {
6374 // This is a bit of a hack. Consider this instruction:
6375 //
6376 // %0 = COPY %sp; GPR64all:%0
6377 //
6378 // We explicitly chose GPR64all for the virtual register so such a copy might
6379 // be eliminated by RegisterCoalescer. However, that may not be possible, and
6380 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
6381 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
6382 //
6383 // To prevent that, we are going to constrain the %0 register class here.
6384 if (MI.isFullCopy()) {
6385 Register DstReg = MI.getOperand(0).getReg();
6386 Register SrcReg = MI.getOperand(1).getReg();
6387 if (SrcReg == AArch64::SP && DstReg.isVirtual()) {
6388 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
6389 return nullptr;
6390 }
6391 if (DstReg == AArch64::SP && SrcReg.isVirtual()) {
6392 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
6393 return nullptr;
6394 }
6395 // Nothing can folded with copy from/to NZCV.
6396 if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV)
6397 return nullptr;
6398 }
6399
6400 // Handle the case where a copy is being spilled or filled but the source
6401 // and destination register class don't match. For example:
6402 //
6403 // %0 = COPY %xzr; GPR64common:%0
6404 //
6405 // In this case we can still safely fold away the COPY and generate the
6406 // following spill code:
6407 //
6408 // STRXui %xzr, %stack.0
6409 //
6410 // This also eliminates spilled cross register class COPYs (e.g. between x and
6411 // d regs) of the same size. For example:
6412 //
6413 // %0 = COPY %1; GPR64:%0, FPR64:%1
6414 //
6415 // will be filled as
6416 //
6417 // LDRDui %0, fi<#0>
6418 //
6419 // instead of
6420 //
6421 // LDRXui %Temp, fi<#0>
6422 // %0 = FMOV %Temp
6423 //
6424 if (MI.isCopy() && Ops.size() == 1 &&
6425 // Make sure we're only folding the explicit COPY defs/uses.
6426 (Ops[0] == 0 || Ops[0] == 1)) {
6427 bool IsSpill = Ops[0] == 0;
6428 bool IsFill = !IsSpill;
6430 const MachineRegisterInfo &MRI = MF.getRegInfo();
6431 MachineBasicBlock &MBB = *MI.getParent();
6432 const MachineOperand &DstMO = MI.getOperand(0);
6433 const MachineOperand &SrcMO = MI.getOperand(1);
6434 Register DstReg = DstMO.getReg();
6435 Register SrcReg = SrcMO.getReg();
6436 // This is slightly expensive to compute for physical regs since
6437 // getMinimalPhysRegClass is slow.
6438 auto getRegClass = [&](unsigned Reg) {
6439 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
6440 : TRI.getMinimalPhysRegClass(Reg);
6441 };
6442
6443 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
6444 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
6445 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
6446 "Mismatched register size in non subreg COPY");
6447 if (IsSpill)
6448 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
6449 getRegClass(SrcReg), &TRI, Register());
6450 else
6451 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
6452 getRegClass(DstReg), &TRI, Register());
6453 return &*--InsertPt;
6454 }
6455
6456 // Handle cases like spilling def of:
6457 //
6458 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
6459 //
6460 // where the physical register source can be widened and stored to the full
6461 // virtual reg destination stack slot, in this case producing:
6462 //
6463 // STRXui %xzr, %stack.0
6464 //
6465 if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
6466 TRI.getRegSizeInBits(*getRegClass(DstReg)) == 64) {
6467 assert(SrcMO.getSubReg() == 0 &&
6468 "Unexpected subreg on physical register");
6469 storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(),
6470 FrameIndex, &AArch64::GPR64RegClass, &TRI,
6471 Register());
6472 return &*--InsertPt;
6473 }
6474
6475 // Handle cases like filling use of:
6476 //
6477 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
6478 //
6479 // where we can load the full virtual reg source stack slot, into the subreg
6480 // destination, in this case producing:
6481 //
6482 // LDRWui %0:sub_32<def,read-undef>, %stack.0
6483 //
6484 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
6485 const TargetRegisterClass *FillRC = nullptr;
6486 switch (DstMO.getSubReg()) {
6487 default:
6488 break;
6489 case AArch64::sub_32:
6490 if (AArch64::GPR64RegClass.hasSubClassEq(getRegClass(DstReg)))
6491 FillRC = &AArch64::GPR32RegClass;
6492 break;
6493 case AArch64::ssub:
6494 FillRC = &AArch64::FPR32RegClass;
6495 break;
6496 case AArch64::dsub:
6497 FillRC = &AArch64::FPR64RegClass;
6498 break;
6499 }
6500
6501 if (FillRC) {
6502 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
6503 TRI.getRegSizeInBits(*FillRC) &&
6504 "Mismatched regclass size on folded subreg COPY");
6505 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI,
6506 Register());
6507 MachineInstr &LoadMI = *--InsertPt;
6508 MachineOperand &LoadDst = LoadMI.getOperand(0);
6509 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
6510 LoadDst.setSubReg(DstMO.getSubReg());
6511 LoadDst.setIsUndef();
6512 return &LoadMI;
6513 }
6514 }
6515 }
6516
6517 // Cannot fold.
6518 return nullptr;
6519}
6520
6522 StackOffset &SOffset,
6523 bool *OutUseUnscaledOp,
6524 unsigned *OutUnscaledOp,
6525 int64_t *EmittableOffset) {
6526 // Set output values in case of early exit.
6527 if (EmittableOffset)
6528 *EmittableOffset = 0;
6529 if (OutUseUnscaledOp)
6530 *OutUseUnscaledOp = false;
6531 if (OutUnscaledOp)
6532 *OutUnscaledOp = 0;
6533
6534 // Exit early for structured vector spills/fills as they can't take an
6535 // immediate offset.
6536 switch (MI.getOpcode()) {
6537 default:
6538 break;
6539 case AArch64::LD1Rv1d:
6540 case AArch64::LD1Rv2s:
6541 case AArch64::LD1Rv2d:
6542 case AArch64::LD1Rv4h:
6543 case AArch64::LD1Rv4s:
6544 case AArch64::LD1Rv8b:
6545 case AArch64::LD1Rv8h:
6546 case AArch64::LD1Rv16b:
6547 case AArch64::LD1Twov2d:
6548 case AArch64::LD1Threev2d:
6549 case AArch64::LD1Fourv2d:
6550 case AArch64::LD1Twov1d:
6551 case AArch64::LD1Threev1d:
6552 case AArch64::LD1Fourv1d:
6553 case AArch64::ST1Twov2d:
6554 case AArch64::ST1Threev2d:
6555 case AArch64::ST1Fourv2d:
6556 case AArch64::ST1Twov1d:
6557 case AArch64::ST1Threev1d:
6558 case AArch64::ST1Fourv1d:
6559 case AArch64::ST1i8:
6560 case AArch64::ST1i16:
6561 case AArch64::ST1i32:
6562 case AArch64::ST1i64:
6563 case AArch64::IRG:
6564 case AArch64::IRGstack:
6565 case AArch64::STGloop:
6566 case AArch64::STZGloop:
6568 }
6569
6570 // Get the min/max offset and the scale.
6571 TypeSize ScaleValue(0U, false), Width(0U, false);
6572 int64_t MinOff, MaxOff;
6573 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
6574 MaxOff))
6575 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6576
6577 // Construct the complete offset.
6578 bool IsMulVL = ScaleValue.isScalable();
6579 unsigned Scale = ScaleValue.getKnownMinValue();
6580 int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
6581
6582 const MachineOperand &ImmOpnd =
6583 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
6584 Offset += ImmOpnd.getImm() * Scale;
6585
6586 // If the offset doesn't match the scale, we rewrite the instruction to
6587 // use the unscaled instruction instead. Likewise, if we have a negative
6588 // offset and there is an unscaled op to use.
6589 std::optional<unsigned> UnscaledOp =
6591 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
6592 if (useUnscaledOp &&
6593 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
6594 MaxOff))
6595 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6596
6597 Scale = ScaleValue.getKnownMinValue();
6598 assert(IsMulVL == ScaleValue.isScalable() &&
6599 "Unscaled opcode has different value for scalable");
6600
6601 int64_t Remainder = Offset % Scale;
6602 assert(!(Remainder && useUnscaledOp) &&
6603 "Cannot have remainder when using unscaled op");
6604
6605 assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
6606 int64_t NewOffset = Offset / Scale;
6607 if (MinOff <= NewOffset && NewOffset <= MaxOff)
6608 Offset = Remainder;
6609 else {
6610 NewOffset = NewOffset < 0 ? MinOff : MaxOff;
6611 Offset = Offset - (NewOffset * Scale);
6612 }
6613
6614 if (EmittableOffset)
6615 *EmittableOffset = NewOffset;
6616 if (OutUseUnscaledOp)
6617 *OutUseUnscaledOp = useUnscaledOp;
6618 if (OutUnscaledOp && UnscaledOp)
6619 *OutUnscaledOp = *UnscaledOp;
6620
6621 if (IsMulVL)
6622 SOffset = StackOffset::get(SOffset.getFixed(), Offset);
6623 else
6624 SOffset = StackOffset::get(Offset, SOffset.getScalable());
6626 (SOffset ? 0 : AArch64FrameOffsetIsLegal);
6627}
6628
6630 unsigned FrameReg, StackOffset &Offset,
6631 const AArch64InstrInfo *TII) {
6632 unsigned Opcode = MI.getOpcode();
6633 unsigned ImmIdx = FrameRegIdx + 1;
6634
6635 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
6636 Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
6637 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
6638 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
6639 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
6640 MI.eraseFromParent();
6641 Offset = StackOffset();
6642 return true;
6643 }
6644
6645 int64_t NewOffset;
6646 unsigned UnscaledOp;
6647 bool UseUnscaledOp;
6648 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
6649 &UnscaledOp, &NewOffset);
6652 // Replace the FrameIndex with FrameReg.
6653 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
6654 if (UseUnscaledOp)
6655 MI.setDesc(TII->get(UnscaledOp));
6656
6657 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
6658 return !Offset;
6659 }
6660
6661 return false;
6662}
6663
6669
6671 return MCInstBuilder(AArch64::HINT).addImm(0);
6672}
6673
6674// AArch64 supports MachineCombiner.
6675bool AArch64InstrInfo::useMachineCombiner() const { return true; }
6676
6677// True when Opc sets flag
6678static bool isCombineInstrSettingFlag(unsigned Opc) {
6679 switch (Opc) {
6680 case AArch64::ADDSWrr:
6681 case AArch64::ADDSWri:
6682 case AArch64::ADDSXrr:
6683 case AArch64::ADDSXri:
6684 case AArch64::SUBSWrr:
6685 case AArch64::SUBSXrr:
6686 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6687 case AArch64::SUBSWri:
6688 case AArch64::SUBSXri:
6689 return true;
6690 default:
6691 break;
6692 }
6693 return false;
6694}
6695
6696// 32b Opcodes that can be combined with a MUL
6697static bool isCombineInstrCandidate32(unsigned Opc) {
6698 switch (Opc) {
6699 case AArch64::ADDWrr:
6700 case AArch64::ADDWri:
6701 case AArch64::SUBWrr:
6702 case AArch64::ADDSWrr:
6703 case AArch64::ADDSWri:
6704 case AArch64::SUBSWrr:
6705 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6706 case AArch64::SUBWri:
6707 case AArch64::SUBSWri:
6708 return true;
6709 default:
6710 break;
6711 }
6712 return false;
6713}
6714
6715// 64b Opcodes that can be combined with a MUL
6716static bool isCombineInstrCandidate64(unsigned Opc) {
6717 switch (Opc) {
6718 case AArch64::ADDXrr:
6719 case AArch64::ADDXri:
6720 case AArch64::SUBXrr:
6721 case AArch64::ADDSXrr:
6722 case AArch64::ADDSXri:
6723 case AArch64::SUBSXrr:
6724 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6725 case AArch64::SUBXri:
6726 case AArch64::SUBSXri:
6727 case AArch64::ADDv8i8:
6728 case AArch64::ADDv16i8:
6729 case AArch64::ADDv4i16:
6730 case AArch64::ADDv8i16:
6731 case AArch64::ADDv2i32:
6732 case AArch64::ADDv4i32:
6733 case AArch64::SUBv8i8:
6734 case AArch64::SUBv16i8:
6735 case AArch64::SUBv4i16:
6736 case AArch64::SUBv8i16:
6737 case AArch64::SUBv2i32:
6738 case AArch64::SUBv4i32:
6739 return true;
6740 default:
6741 break;
6742 }
6743 return false;
6744}
6745
6746// FP Opcodes that can be combined with a FMUL.
6747static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
6748 switch (Inst.getOpcode()) {
6749 default:
6750 break;
6751 case AArch64::FADDHrr:
6752 case AArch64::FADDSrr:
6753 case AArch64::FADDDrr:
6754 case AArch64::FADDv4f16:
6755 case AArch64::FADDv8f16:
6756 case AArch64::FADDv2f32:
6757 case AArch64::FADDv2f64:
6758 case AArch64::FADDv4f32:
6759 case AArch64::FSUBHrr:
6760 case AArch64::FSUBSrr:
6761 case AArch64::FSUBDrr:
6762 case AArch64::FSUBv4f16:
6763 case AArch64::FSUBv8f16:
6764 case AArch64::FSUBv2f32:
6765 case AArch64::FSUBv2f64:
6766 case AArch64::FSUBv4f32:
6768 // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
6769 // the target options or if FADD/FSUB has the contract fast-math flag.
6770 return Options.AllowFPOpFusion == FPOpFusion::Fast ||
6772 }
6773 return false;
6774}
6775
6776// Opcodes that can be combined with a MUL
6780
6781//
6782// Utility routine that checks if \param MO is defined by an
6783// \param CombineOpc instruction in the basic block \param MBB
6785 unsigned CombineOpc, unsigned ZeroReg = 0,
6786 bool CheckZeroReg = false) {
6787 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
6788 MachineInstr *MI = nullptr;
6789
6790 if (MO.isReg() && MO.getReg().isVirtual())
6791 MI = MRI.getUniqueVRegDef(MO.getReg());
6792 // And it needs to be in the trace (otherwise, it won't have a depth).
6793 if (!MI || MI->getParent() != &MBB || MI->getOpcode() != CombineOpc)
6794 return false;
6795 // Must only used by the user we combine with.
6796 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
6797 return false;
6798
6799 if (CheckZeroReg) {
6800 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
6801 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
6802 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
6803 // The third input reg must be zero.
6804 if (MI->getOperand(3).getReg() != ZeroReg)
6805 return false;
6806 }
6807
6808 if (isCombineInstrSettingFlag(CombineOpc) &&
6809 MI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) == -1)
6810 return false;
6811
6812 return true;
6813}
6814
6815//
6816// Is \param MO defined by an integer multiply and can be combined?
6818 unsigned MulOpc, unsigned ZeroReg) {
6819 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
6820}
6821
6822//
6823// Is \param MO defined by a floating-point multiply and can be combined?
6825 unsigned MulOpc) {
6826 return canCombine(MBB, MO, MulOpc);
6827}
6828
6829// TODO: There are many more machine instruction opcodes to match:
6830// 1. Other data types (integer, vectors)
6831// 2. Other math / logic operations (xor, or)
6832// 3. Other forms of the same operation (intrinsics and other variants)
6833bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
6834 bool Invert) const {
6835 if (Invert)
6836 return false;
6837 switch (Inst.getOpcode()) {
6838 // == Floating-point types ==
6839 // -- Floating-point instructions --
6840 case AArch64::FADDHrr:
6841 case AArch64::FADDSrr:
6842 case AArch64::FADDDrr:
6843 case AArch64::FMULHrr:
6844 case AArch64::FMULSrr:
6845 case AArch64::FMULDrr:
6846 case AArch64::FMULX16:
6847 case AArch64::FMULX32:
6848 case AArch64::FMULX64:
6849 // -- Advanced SIMD instructions --
6850 case AArch64::FADDv4f16:
6851 case AArch64::FADDv8f16:
6852 case AArch64::FADDv2f32:
6853 case AArch64::FADDv4f32:
6854 case AArch64::FADDv2f64:
6855 case AArch64::FMULv4f16:
6856 case AArch64::FMULv8f16:
6857 case AArch64::FMULv2f32:
6858 case AArch64::FMULv4f32:
6859 case AArch64::FMULv2f64:
6860 case AArch64::FMULXv4f16:
6861 case AArch64::FMULXv8f16:
6862 case AArch64::FMULXv2f32:
6863 case AArch64::FMULXv4f32:
6864 case AArch64::FMULXv2f64:
6865 // -- SVE instructions --
6866 // Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX
6867 // in the SVE instruction set (though there are predicated ones).
6868 case AArch64::FADD_ZZZ_H:
6869 case AArch64::FADD_ZZZ_S:
6870 case AArch64::FADD_ZZZ_D:
6871 case AArch64::FMUL_ZZZ_H:
6872 case AArch64::FMUL_ZZZ_S:
6873 case AArch64::FMUL_ZZZ_D:
6876
6877 // == Integer types ==
6878 // -- Base instructions --
6879 // Opcodes MULWrr and MULXrr don't exist because
6880 // `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of
6881 // `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively.
6882 // The machine-combiner does not support three-source-operands machine
6883 // instruction. So we cannot reassociate MULs.
6884 case AArch64::ADDWrr:
6885 case AArch64::ADDXrr:
6886 case AArch64::ANDWrr:
6887 case AArch64::ANDXrr:
6888 case AArch64::ORRWrr:
6889 case AArch64::ORRXrr:
6890 case AArch64::EORWrr:
6891 case AArch64::EORXrr:
6892 case AArch64::EONWrr:
6893 case AArch64::EONXrr:
6894 // -- Advanced SIMD instructions --
6895 // Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL
6896 // in the Advanced SIMD instruction set.
6897 case AArch64::ADDv8i8:
6898 case AArch64::ADDv16i8:
6899 case AArch64::ADDv4i16:
6900 case AArch64::ADDv8i16:
6901 case AArch64::ADDv2i32:
6902 case AArch64::ADDv4i32:
6903 case AArch64::ADDv1i64:
6904 case AArch64::ADDv2i64:
6905 case AArch64::MULv8i8:
6906 case AArch64::MULv16i8:
6907 case AArch64::MULv4i16:
6908 case AArch64::MULv8i16:
6909 case AArch64::MULv2i32:
6910 case AArch64::MULv4i32:
6911 case AArch64::ANDv8i8:
6912 case AArch64::ANDv16i8:
6913 case AArch64::ORRv8i8:
6914 case AArch64::ORRv16i8:
6915 case AArch64::EORv8i8:
6916 case AArch64::EORv16i8:
6917 // -- SVE instructions --
6918 case AArch64::ADD_ZZZ_B:
6919 case AArch64::ADD_ZZZ_H:
6920 case AArch64::ADD_ZZZ_S:
6921 case AArch64::ADD_ZZZ_D:
6922 case AArch64::MUL_ZZZ_B:
6923 case AArch64::MUL_ZZZ_H:
6924 case AArch64::MUL_ZZZ_S:
6925 case AArch64::MUL_ZZZ_D:
6926 case AArch64::AND_ZZZ:
6927 case AArch64::ORR_ZZZ:
6928 case AArch64::EOR_ZZZ:
6929 return true;
6930
6931 default:
6932 return false;
6933 }
6934}
6935
6936/// Find instructions that can be turned into madd.
6938 SmallVectorImpl<unsigned> &Patterns) {
6939 unsigned Opc = Root.getOpcode();
6940 MachineBasicBlock &MBB = *Root.getParent();
6941 bool Found = false;
6942
6944 return false;
6946 int Cmp_NZCV =
6947 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
6948 // When NZCV is live bail out.
6949 if (Cmp_NZCV == -1)
6950 return false;
6951 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
6952 // When opcode can't change bail out.
6953 // CHECKME: do we miss any cases for opcode conversion?
6954 if (NewOpc == Opc)
6955 return false;
6956 Opc = NewOpc;
6957 }
6958
6959 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
6960 unsigned Pattern) {
6961 if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
6962 Patterns.push_back(Pattern);
6963 Found = true;
6964 }
6965 };
6966
6967 auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) {
6968 if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
6969 Patterns.push_back(Pattern);
6970 Found = true;
6971 }
6972 };
6973
6975
6976 switch (Opc) {
6977 default:
6978 break;
6979 case AArch64::ADDWrr:
6980 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6981 "ADDWrr does not have register operands");
6982 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
6983 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
6984 break;
6985 case AArch64::ADDXrr:
6986 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
6987 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
6988 break;
6989 case AArch64::SUBWrr:
6990 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
6991 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
6992 break;
6993 case AArch64::SUBXrr:
6994 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
6995 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
6996 break;
6997 case AArch64::ADDWri:
6998 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
6999 break;
7000 case AArch64::ADDXri:
7001 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
7002 break;
7003 case AArch64::SUBWri:
7004 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
7005 break;
7006 case AArch64::SUBXri:
7007 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
7008 break;
7009 case AArch64::ADDv8i8:
7010 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
7011 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
7012 break;
7013 case AArch64::ADDv16i8:
7014 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
7015 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
7016 break;
7017 case AArch64::ADDv4i16:
7018 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
7019 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
7020 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
7021 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
7022 break;
7023 case AArch64::ADDv8i16:
7024 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
7025 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
7026 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
7027 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
7028 break;
7029 case AArch64::ADDv2i32:
7030 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
7031 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
7032 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
7033 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
7034 break;
7035 case AArch64::ADDv4i32:
7036 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
7037 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
7038 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
7039 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
7040 break;
7041 case AArch64::SUBv8i8:
7042 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
7043 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
7044 break;
7045 case AArch64::SUBv16i8:
7046 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
7047 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
7048 break;
7049 case AArch64::SUBv4i16:
7050 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
7051 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
7052 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
7053 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
7054 break;
7055 case AArch64::SUBv8i16:
7056 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
7057 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
7058 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
7059 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
7060 break;
7061 case AArch64::SUBv2i32:
7062 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
7063 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
7064 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
7065 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
7066 break;
7067 case AArch64::SUBv4i32:
7068 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
7069 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
7070 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
7071 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
7072 break;
7073 }
7074 return Found;
7075}
7076
7077bool AArch64InstrInfo::isAccumulationOpcode(unsigned Opcode) const {
7078 switch (Opcode) {
7079 default:
7080 break;
7081 case AArch64::UABALB_ZZZ_D:
7082 case AArch64::UABALB_ZZZ_H:
7083 case AArch64::UABALB_ZZZ_S:
7084 case AArch64::UABALT_ZZZ_D:
7085 case AArch64::UABALT_ZZZ_H:
7086 case AArch64::UABALT_ZZZ_S:
7087 case AArch64::SABALB_ZZZ_D:
7088 case AArch64::SABALB_ZZZ_S:
7089 case AArch64::SABALB_ZZZ_H:
7090 case AArch64::SABALT_ZZZ_D:
7091 case AArch64::SABALT_ZZZ_S:
7092 case AArch64::SABALT_ZZZ_H:
7093 case AArch64::UABALv16i8_v8i16:
7094 case AArch64::UABALv2i32_v2i64:
7095 case AArch64::UABALv4i16_v4i32:
7096 case AArch64::UABALv4i32_v2i64:
7097 case AArch64::UABALv8i16_v4i32:
7098 case AArch64::UABALv8i8_v8i16:
7099 case AArch64::UABAv16i8:
7100 case AArch64::UABAv2i32:
7101 case AArch64::UABAv4i16:
7102 case AArch64::UABAv4i32:
7103 case AArch64::UABAv8i16:
7104 case AArch64::UABAv8i8:
7105 case AArch64::SABALv16i8_v8i16:
7106 case AArch64::SABALv2i32_v2i64:
7107 case AArch64::SABALv4i16_v4i32:
7108 case AArch64::SABALv4i32_v2i64:
7109 case AArch64::SABALv8i16_v4i32:
7110 case AArch64::SABALv8i8_v8i16:
7111 case AArch64::SABAv16i8:
7112 case AArch64::SABAv2i32:
7113 case AArch64::SABAv4i16:
7114 case AArch64::SABAv4i32:
7115 case AArch64::SABAv8i16:
7116 case AArch64::SABAv8i8:
7117 return true;
7118 }
7119
7120 return false;
7121}
7122
7123unsigned AArch64InstrInfo::getAccumulationStartOpcode(
7124 unsigned AccumulationOpcode) const {
7125 switch (AccumulationOpcode) {
7126 default:
7127 llvm_unreachable("Unsupported accumulation Opcode!");
7128 case AArch64::UABALB_ZZZ_D:
7129 return AArch64::UABDLB_ZZZ_D;
7130 case AArch64::UABALB_ZZZ_H:
7131 return AArch64::UABDLB_ZZZ_H;
7132 case AArch64::UABALB_ZZZ_S:
7133 return AArch64::UABDLB_ZZZ_S;
7134 case AArch64::UABALT_ZZZ_D:
7135 return AArch64::UABDLT_ZZZ_D;
7136 case AArch64::UABALT_ZZZ_H:
7137 return AArch64::UABDLT_ZZZ_H;
7138 case AArch64::UABALT_ZZZ_S:
7139 return AArch64::UABDLT_ZZZ_S;
7140 case AArch64::UABALv16i8_v8i16:
7141 return AArch64::UABDLv16i8_v8i16;
7142 case AArch64::UABALv2i32_v2i64:
7143 return AArch64::UABDLv2i32_v2i64;
7144 case AArch64::UABALv4i16_v4i32:
7145 return AArch64::UABDLv4i16_v4i32;
7146 case AArch64::UABALv4i32_v2i64:
7147 return AArch64::UABDLv4i32_v2i64;
7148 case AArch64::UABALv8i16_v4i32:
7149 return AArch64::UABDLv8i16_v4i32;
7150 case AArch64::UABALv8i8_v8i16:
7151 return AArch64::UABDLv8i8_v8i16;
7152 case AArch64::UABAv16i8:
7153 return AArch64::UABDv16i8;
7154 case AArch64::UABAv2i32:
7155 return AArch64::UABDv2i32;
7156 case AArch64::UABAv4i16:
7157 return AArch64::UABDv4i16;
7158 case AArch64::UABAv4i32:
7159 return AArch64::UABDv4i32;
7160 case AArch64::UABAv8i16:
7161 return AArch64::UABDv8i16;
7162 case AArch64::UABAv8i8:
7163 return AArch64::UABDv8i8;
7164 case AArch64::SABALB_ZZZ_D:
7165 return AArch64::SABDLB_ZZZ_D;
7166 case AArch64::SABALB_ZZZ_S:
7167 return AArch64::SABDLB_ZZZ_S;
7168 case AArch64::SABALB_ZZZ_H:
7169 return AArch64::SABDLB_ZZZ_H;
7170 case AArch64::SABALT_ZZZ_D:
7171 return AArch64::SABDLT_ZZZ_D;
7172 case AArch64::SABALT_ZZZ_S:
7173 return AArch64::SABDLT_ZZZ_S;
7174 case AArch64::SABALT_ZZZ_H:
7175 return AArch64::SABDLT_ZZZ_H;
7176 case AArch64::SABALv16i8_v8i16:
7177 return AArch64::SABDLv16i8_v8i16;
7178 case AArch64::SABALv2i32_v2i64:
7179 return AArch64::SABDLv2i32_v2i64;
7180 case AArch64::SABALv4i16_v4i32:
7181 return AArch64::SABDLv4i16_v4i32;
7182 case AArch64::SABALv4i32_v2i64:
7183 return AArch64::SABDLv4i32_v2i64;
7184 case AArch64::SABALv8i16_v4i32:
7185 return AArch64::SABDLv8i16_v4i32;
7186 case AArch64::SABALv8i8_v8i16:
7187 return AArch64::SABDLv8i8_v8i16;
7188 case AArch64::SABAv16i8:
7189 return AArch64::SABDv16i8;
7190 case AArch64::SABAv2i32:
7191 return AArch64::SABAv2i32;
7192 case AArch64::SABAv4i16:
7193 return AArch64::SABDv4i16;
7194 case AArch64::SABAv4i32:
7195 return AArch64::SABDv4i32;
7196 case AArch64::SABAv8i16:
7197 return AArch64::SABDv8i16;
7198 case AArch64::SABAv8i8:
7199 return AArch64::SABDv8i8;
7200 }
7201}
7202
7203/// Floating-Point Support
7204
7205/// Find instructions that can be turned into madd.
7207 SmallVectorImpl<unsigned> &Patterns) {
7208
7209 if (!isCombineInstrCandidateFP(Root))
7210 return false;
7211
7212 MachineBasicBlock &MBB = *Root.getParent();
7213 bool Found = false;
7214
7215 auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool {
7216 if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
7217 Patterns.push_back(Pattern);
7218 return true;
7219 }
7220 return false;
7221 };
7222
7224
7225 switch (Root.getOpcode()) {
7226 default:
7227 assert(false && "Unsupported FP instruction in combiner\n");
7228 break;
7229 case AArch64::FADDHrr:
7230 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7231 "FADDHrr does not have register operands");
7232
7233 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
7234 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
7235 break;
7236 case AArch64::FADDSrr:
7237 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7238 "FADDSrr does not have register operands");
7239
7240 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
7241 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
7242
7243 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
7244 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
7245 break;
7246 case AArch64::FADDDrr:
7247 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
7248 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
7249
7250 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
7251 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
7252 break;
7253 case AArch64::FADDv4f16:
7254 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
7255 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
7256
7257 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
7258 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
7259 break;
7260 case AArch64::FADDv8f16:
7261 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
7262 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
7263
7264 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
7265 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
7266 break;
7267 case AArch64::FADDv2f32:
7268 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
7269 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
7270
7271 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
7272 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
7273 break;
7274 case AArch64::FADDv2f64:
7275 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
7276 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
7277
7278 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
7279 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
7280 break;
7281 case AArch64::FADDv4f32:
7282 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
7283 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
7284
7285 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
7286 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
7287 break;
7288 case AArch64::FSUBHrr:
7289 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
7290 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
7291 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
7292 break;
7293 case AArch64::FSUBSrr:
7294 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
7295
7296 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
7297 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
7298
7299 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
7300 break;
7301 case AArch64::FSUBDrr:
7302 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
7303
7304 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
7305 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
7306
7307 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
7308 break;
7309 case AArch64::FSUBv4f16:
7310 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
7311 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
7312
7313 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
7314 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
7315 break;
7316 case AArch64::FSUBv8f16:
7317 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
7318 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
7319
7320 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
7321 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
7322 break;
7323 case AArch64::FSUBv2f32:
7324 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
7325 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
7326
7327 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
7328 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
7329 break;
7330 case AArch64::FSUBv2f64:
7331 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
7332 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
7333
7334 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
7335 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
7336 break;
7337 case AArch64::FSUBv4f32:
7338 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
7339 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
7340
7341 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
7342 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
7343 break;
7344 }
7345 return Found;
7346}
7347
7349 SmallVectorImpl<unsigned> &Patterns) {
7350 MachineBasicBlock &MBB = *Root.getParent();
7351 bool Found = false;
7352
7353 auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool {
7354 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7355 MachineOperand &MO = Root.getOperand(Operand);
7356 MachineInstr *MI = nullptr;
7357 if (MO.isReg() && MO.getReg().isVirtual())
7358 MI = MRI.getUniqueVRegDef(MO.getReg());
7359 // Ignore No-op COPYs in FMUL(COPY(DUP(..)))
7360 if (MI && MI->getOpcode() == TargetOpcode::COPY &&
7361 MI->getOperand(1).getReg().isVirtual())
7362 MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg());
7363 if (MI && MI->getOpcode() == Opcode) {
7364 Patterns.push_back(Pattern);
7365 return true;
7366 }
7367 return false;
7368 };
7369
7371
7372 switch (Root.getOpcode()) {
7373 default:
7374 return false;
7375 case AArch64::FMULv2f32:
7376 Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
7377 Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
7378 break;
7379 case AArch64::FMULv2f64:
7380 Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
7381 Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
7382 break;
7383 case AArch64::FMULv4f16:
7384 Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
7385 Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
7386 break;
7387 case AArch64::FMULv4f32:
7388 Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
7389 Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
7390 break;
7391 case AArch64::FMULv8f16:
7392 Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
7393 Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
7394 break;
7395 }
7396
7397 return Found;
7398}
7399
7401 SmallVectorImpl<unsigned> &Patterns) {
7402 unsigned Opc = Root.getOpcode();
7403 MachineBasicBlock &MBB = *Root.getParent();
7404 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7405
7406 auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool {
7407 MachineOperand &MO = Root.getOperand(1);
7408 MachineInstr *MI = MRI.getUniqueVRegDef(MO.getReg());
7409 if (MI != nullptr && (MI->getOpcode() == Opcode) &&
7410 MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) &&
7414 MI->getFlag(MachineInstr::MIFlag::FmNsz)) {
7415 Patterns.push_back(Pattern);
7416 return true;
7417 }
7418 return false;
7419 };
7420
7421 switch (Opc) {
7422 default:
7423 break;
7424 case AArch64::FNEGDr:
7425 return Match(AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD);
7426 case AArch64::FNEGSr:
7427 return Match(AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD);
7428 }
7429
7430 return false;
7431}
7432
7433/// Return true when a code sequence can improve throughput. It
7434/// should be called only for instructions in loops.
7435/// \param Pattern - combiner pattern
7437 switch (Pattern) {
7438 default:
7439 break;
7545 return true;
7546 } // end switch (Pattern)
7547 return false;
7548}
7549
7550/// Find other MI combine patterns.
7552 SmallVectorImpl<unsigned> &Patterns) {
7553 // A - (B + C) ==> (A - B) - C or (A - C) - B
7554 unsigned Opc = Root.getOpcode();
7555 MachineBasicBlock &MBB = *Root.getParent();
7556
7557 switch (Opc) {
7558 case AArch64::SUBWrr:
7559 case AArch64::SUBSWrr:
7560 case AArch64::SUBXrr:
7561 case AArch64::SUBSXrr:
7562 // Found candidate root.
7563 break;
7564 default:
7565 return false;
7566 }
7567
7569 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) ==
7570 -1)
7571 return false;
7572
7573 if (canCombine(MBB, Root.getOperand(2), AArch64::ADDWrr) ||
7574 canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) ||
7575 canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) ||
7576 canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) {
7579 return true;
7580 }
7581
7582 return false;
7583}
7584
7585/// Check if the given instruction forms a gather load pattern that can be
7586/// optimized for better Memory-Level Parallelism (MLP). This function
7587/// identifies chains of NEON lane load instructions that load data from
7588/// different memory addresses into individual lanes of a 128-bit vector
7589/// register, then attempts to split the pattern into parallel loads to break
7590/// the serial dependency between instructions.
7591///
7592/// Pattern Matched:
7593/// Initial scalar load -> SUBREG_TO_REG (lane 0) -> LD1i* (lane 1) ->
7594/// LD1i* (lane 2) -> ... -> LD1i* (lane N-1, Root)
7595///
7596/// Transformed Into:
7597/// Two parallel vector loads using fewer lanes each, followed by ZIP1v2i64
7598/// to combine the results, enabling better memory-level parallelism.
7599///
7600/// Supported Element Types:
7601/// - 32-bit elements (LD1i32, 4 lanes total)
7602/// - 16-bit elements (LD1i16, 8 lanes total)
7603/// - 8-bit elements (LD1i8, 16 lanes total)
7605 SmallVectorImpl<unsigned> &Patterns,
7606 unsigned LoadLaneOpCode, unsigned NumLanes) {
7607 const MachineFunction *MF = Root.getMF();
7608
7609 // Early exit if optimizing for size.
7610 if (MF->getFunction().hasMinSize())
7611 return false;
7612
7613 const MachineRegisterInfo &MRI = MF->getRegInfo();
7615
7616 // The root of the pattern must load into the last lane of the vector.
7617 if (Root.getOperand(2).getImm() != NumLanes - 1)
7618 return false;
7619
7620 // Check that we have load into all lanes except lane 0.
7621 // For each load we also want to check that:
7622 // 1. It has a single non-debug use (since we will be replacing the virtual
7623 // register)
7624 // 2. That the addressing mode only uses a single pointer operand
7625 auto *CurrInstr = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
7626 auto Range = llvm::seq<unsigned>(1, NumLanes - 1);
7627 SmallSet<unsigned, 16> RemainingLanes(Range.begin(), Range.end());
7629 while (!RemainingLanes.empty() && CurrInstr &&
7630 CurrInstr->getOpcode() == LoadLaneOpCode &&
7631 MRI.hasOneNonDBGUse(CurrInstr->getOperand(0).getReg()) &&
7632 CurrInstr->getNumOperands() == 4) {
7633 RemainingLanes.erase(CurrInstr->getOperand(2).getImm());
7634 LoadInstrs.push_back(CurrInstr);
7635 CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
7636 }
7637
7638 // Check that we have found a match for lanes N-1.. 1.
7639 if (!RemainingLanes.empty())
7640 return false;
7641
7642 // Match the SUBREG_TO_REG sequence.
7643 if (CurrInstr->getOpcode() != TargetOpcode::SUBREG_TO_REG)
7644 return false;
7645
7646 // Verify that the subreg to reg loads an integer into the first lane.
7647 auto Lane0LoadReg = CurrInstr->getOperand(2).getReg();
7648 unsigned SingleLaneSizeInBits = 128 / NumLanes;
7649 if (TRI->getRegSizeInBits(Lane0LoadReg, MRI) != SingleLaneSizeInBits)
7650 return false;
7651
7652 // Verify that it also has a single non debug use.
7653 if (!MRI.hasOneNonDBGUse(Lane0LoadReg))
7654 return false;
7655
7656 LoadInstrs.push_back(MRI.getUniqueVRegDef(Lane0LoadReg));
7657
7658 // If there is any chance of aliasing, do not apply the pattern.
7659 // Walk backward through the MBB starting from Root.
7660 // Exit early if we've encountered all load instructions or hit the search
7661 // limit.
7662 auto MBBItr = Root.getIterator();
7663 unsigned RemainingSteps = GatherOptSearchLimit;
7664 SmallPtrSet<const MachineInstr *, 16> RemainingLoadInstrs;
7665 RemainingLoadInstrs.insert(LoadInstrs.begin(), LoadInstrs.end());
7666 const MachineBasicBlock *MBB = Root.getParent();
7667
7668 for (; MBBItr != MBB->begin() && RemainingSteps > 0 &&
7669 !RemainingLoadInstrs.empty();
7670 --MBBItr, --RemainingSteps) {
7671 const MachineInstr &CurrInstr = *MBBItr;
7672
7673 // Remove this instruction from remaining loads if it's one we're tracking.
7674 RemainingLoadInstrs.erase(&CurrInstr);
7675
7676 // Check for potential aliasing with any of the load instructions to
7677 // optimize.
7678 if (CurrInstr.isLoadFoldBarrier())
7679 return false;
7680 }
7681
7682 // If we hit the search limit without finding all load instructions,
7683 // don't match the pattern.
7684 if (RemainingSteps == 0 && !RemainingLoadInstrs.empty())
7685 return false;
7686
7687 switch (NumLanes) {
7688 case 4:
7690 break;
7691 case 8:
7693 break;
7694 case 16:
7696 break;
7697 default:
7698 llvm_unreachable("Got bad number of lanes for gather pattern.");
7699 }
7700
7701 return true;
7702}
7703
7704/// Search for patterns of LD instructions we can optimize.
7706 SmallVectorImpl<unsigned> &Patterns) {
7707
7708 // The pattern searches for loads into single lanes.
7709 switch (Root.getOpcode()) {
7710 case AArch64::LD1i32:
7711 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 4);
7712 case AArch64::LD1i16:
7713 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 8);
7714 case AArch64::LD1i8:
7715 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 16);
7716 default:
7717 return false;
7718 }
7719}
7720
7721/// Generate optimized instruction sequence for gather load patterns to improve
7722/// Memory-Level Parallelism (MLP). This function transforms a chain of
7723/// sequential NEON lane loads into parallel vector loads that can execute
7724/// concurrently.
7725static void
7729 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
7730 unsigned Pattern, unsigned NumLanes) {
7731 MachineFunction &MF = *Root.getParent()->getParent();
7734
7735 // Gather the initial load instructions to build the pattern.
7736 SmallVector<MachineInstr *, 16> LoadToLaneInstrs;
7737 MachineInstr *CurrInstr = &Root;
7738 for (unsigned i = 0; i < NumLanes - 1; ++i) {
7739 LoadToLaneInstrs.push_back(CurrInstr);
7740 CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
7741 }
7742
7743 // Sort the load instructions according to the lane.
7744 llvm::sort(LoadToLaneInstrs,
7745 [](const MachineInstr *A, const MachineInstr *B) {
7746 return A->getOperand(2).getImm() > B->getOperand(2).getImm();
7747 });
7748
7749 MachineInstr *SubregToReg = CurrInstr;
7750 LoadToLaneInstrs.push_back(
7751 MRI.getUniqueVRegDef(SubregToReg->getOperand(2).getReg()));
7752 auto LoadToLaneInstrsAscending = llvm::reverse(LoadToLaneInstrs);
7753
7754 const TargetRegisterClass *FPR128RegClass =
7755 MRI.getRegClass(Root.getOperand(0).getReg());
7756
7757 // Helper lambda to create a LD1 instruction.
7758 auto CreateLD1Instruction = [&](MachineInstr *OriginalInstr,
7759 Register SrcRegister, unsigned Lane,
7760 Register OffsetRegister,
7761 bool OffsetRegisterKillState) {
7762 auto NewRegister = MRI.createVirtualRegister(FPR128RegClass);
7763 MachineInstrBuilder LoadIndexIntoRegister =
7764 BuildMI(MF, MIMetadata(*OriginalInstr), TII->get(Root.getOpcode()),
7765 NewRegister)
7766 .addReg(SrcRegister)
7767 .addImm(Lane)
7768 .addReg(OffsetRegister, getKillRegState(OffsetRegisterKillState));
7769 InstrIdxForVirtReg.insert(std::make_pair(NewRegister, InsInstrs.size()));
7770 InsInstrs.push_back(LoadIndexIntoRegister);
7771 return NewRegister;
7772 };
7773
7774 // Helper to create load instruction based on the NumLanes in the NEON
7775 // register we are rewriting.
7776 auto CreateLDRInstruction = [&](unsigned NumLanes, Register DestReg,
7777 Register OffsetReg,
7778 bool KillState) -> MachineInstrBuilder {
7779 unsigned Opcode;
7780 switch (NumLanes) {
7781 case 4:
7782 Opcode = AArch64::LDRSui;
7783 break;
7784 case 8:
7785 Opcode = AArch64::LDRHui;
7786 break;
7787 case 16:
7788 Opcode = AArch64::LDRBui;
7789 break;
7790 default:
7792 "Got unsupported number of lanes in machine-combiner gather pattern");
7793 }
7794 // Immediate offset load
7795 return BuildMI(MF, MIMetadata(Root), TII->get(Opcode), DestReg)
7796 .addReg(OffsetReg)
7797 .addImm(0);
7798 };
7799
7800 // Load the remaining lanes into register 0.
7801 auto LanesToLoadToReg0 =
7802 llvm::make_range(LoadToLaneInstrsAscending.begin() + 1,
7803 LoadToLaneInstrsAscending.begin() + NumLanes / 2);
7804 Register PrevReg = SubregToReg->getOperand(0).getReg();
7805 for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg0)) {
7806 const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
7807 PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,
7808 OffsetRegOperand.getReg(),
7809 OffsetRegOperand.isKill());
7810 DelInstrs.push_back(LoadInstr);
7811 }
7812 Register LastLoadReg0 = PrevReg;
7813
7814 // First load into register 1. Perform an integer load to zero out the upper
7815 // lanes in a single instruction.
7816 MachineInstr *Lane0Load = *LoadToLaneInstrsAscending.begin();
7817 MachineInstr *OriginalSplitLoad =
7818 *std::next(LoadToLaneInstrsAscending.begin(), NumLanes / 2);
7819 Register DestRegForMiddleIndex = MRI.createVirtualRegister(
7820 MRI.getRegClass(Lane0Load->getOperand(0).getReg()));
7821
7822 const MachineOperand &OriginalSplitToLoadOffsetOperand =
7823 OriginalSplitLoad->getOperand(3);
7824 MachineInstrBuilder MiddleIndexLoadInstr =
7825 CreateLDRInstruction(NumLanes, DestRegForMiddleIndex,
7826 OriginalSplitToLoadOffsetOperand.getReg(),
7827 OriginalSplitToLoadOffsetOperand.isKill());
7828
7829 InstrIdxForVirtReg.insert(
7830 std::make_pair(DestRegForMiddleIndex, InsInstrs.size()));
7831 InsInstrs.push_back(MiddleIndexLoadInstr);
7832 DelInstrs.push_back(OriginalSplitLoad);
7833
7834 // Subreg To Reg instruction for register 1.
7835 Register DestRegForSubregToReg = MRI.createVirtualRegister(FPR128RegClass);
7836 unsigned SubregType;
7837 switch (NumLanes) {
7838 case 4:
7839 SubregType = AArch64::ssub;
7840 break;
7841 case 8:
7842 SubregType = AArch64::hsub;
7843 break;
7844 case 16:
7845 SubregType = AArch64::bsub;
7846 break;
7847 default:
7849 "Got invalid NumLanes for machine-combiner gather pattern");
7850 }
7851
7852 auto SubRegToRegInstr =
7853 BuildMI(MF, MIMetadata(Root), TII->get(SubregToReg->getOpcode()),
7854 DestRegForSubregToReg)
7855 .addImm(0)
7856 .addReg(DestRegForMiddleIndex, getKillRegState(true))
7857 .addImm(SubregType);
7858 InstrIdxForVirtReg.insert(
7859 std::make_pair(DestRegForSubregToReg, InsInstrs.size()));
7860 InsInstrs.push_back(SubRegToRegInstr);
7861
7862 // Load remaining lanes into register 1.
7863 auto LanesToLoadToReg1 =
7864 llvm::make_range(LoadToLaneInstrsAscending.begin() + NumLanes / 2 + 1,
7865 LoadToLaneInstrsAscending.end());
7866 PrevReg = SubRegToRegInstr->getOperand(0).getReg();
7867 for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg1)) {
7868 const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
7869 PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,
7870 OffsetRegOperand.getReg(),
7871 OffsetRegOperand.isKill());
7872
7873 // Do not add the last reg to DelInstrs - it will be removed later.
7874 if (Index == NumLanes / 2 - 2) {
7875 break;
7876 }
7877 DelInstrs.push_back(LoadInstr);
7878 }
7879 Register LastLoadReg1 = PrevReg;
7880
7881 // Create the final zip instruction to combine the results.
7882 MachineInstrBuilder ZipInstr =
7883 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::ZIP1v2i64),
7884 Root.getOperand(0).getReg())
7885 .addReg(LastLoadReg0)
7886 .addReg(LastLoadReg1);
7887 InsInstrs.push_back(ZipInstr);
7888}
7889
7903
7904/// Return true when there is potentially a faster code sequence for an
7905/// instruction chain ending in \p Root. All potential patterns are listed in
7906/// the \p Pattern vector. Pattern should be sorted in priority order since the
7907/// pattern evaluator stops checking as soon as it finds a faster sequence.
7908
7909bool AArch64InstrInfo::getMachineCombinerPatterns(
7910 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
7911 bool DoRegPressureReduce) const {
7912 // Integer patterns
7913 if (getMaddPatterns(Root, Patterns))
7914 return true;
7915 // Floating point patterns
7916 if (getFMULPatterns(Root, Patterns))
7917 return true;
7918 if (getFMAPatterns(Root, Patterns))
7919 return true;
7920 if (getFNEGPatterns(Root, Patterns))
7921 return true;
7922
7923 // Other patterns
7924 if (getMiscPatterns(Root, Patterns))
7925 return true;
7926
7927 // Load patterns
7928 if (getLoadPatterns(Root, Patterns))
7929 return true;
7930
7931 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
7932 DoRegPressureReduce);
7933}
7934
7936/// genFusedMultiply - Generate fused multiply instructions.
7937/// This function supports both integer and floating point instructions.
7938/// A typical example:
7939/// F|MUL I=A,B,0
7940/// F|ADD R,I,C
7941/// ==> F|MADD R,A,B,C
7942/// \param MF Containing MachineFunction
7943/// \param MRI Register information
7944/// \param TII Target information
7945/// \param Root is the F|ADD instruction
7946/// \param [out] InsInstrs is a vector of machine instructions and will
7947/// contain the generated madd instruction
7948/// \param IdxMulOpd is index of operand in Root that is the result of
7949/// the F|MUL. In the example above IdxMulOpd is 1.
7950/// \param MaddOpc the opcode fo the f|madd instruction
7951/// \param RC Register class of operands
7952/// \param kind of fma instruction (addressing mode) to be generated
7953/// \param ReplacedAddend is the result register from the instruction
7954/// replacing the non-combined operand, if any.
7955static MachineInstr *
7957 const TargetInstrInfo *TII, MachineInstr &Root,
7958 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
7959 unsigned MaddOpc, const TargetRegisterClass *RC,
7961 const Register *ReplacedAddend = nullptr) {
7962 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
7963
7964 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
7965 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
7966 Register ResultReg = Root.getOperand(0).getReg();
7967 Register SrcReg0 = MUL->getOperand(1).getReg();
7968 bool Src0IsKill = MUL->getOperand(1).isKill();
7969 Register SrcReg1 = MUL->getOperand(2).getReg();
7970 bool Src1IsKill = MUL->getOperand(2).isKill();
7971
7972 Register SrcReg2;
7973 bool Src2IsKill;
7974 if (ReplacedAddend) {
7975 // If we just generated a new addend, we must be it's only use.
7976 SrcReg2 = *ReplacedAddend;
7977 Src2IsKill = true;
7978 } else {
7979 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
7980 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
7981 }
7982
7983 if (ResultReg.isVirtual())
7984 MRI.constrainRegClass(ResultReg, RC);
7985 if (SrcReg0.isVirtual())
7986 MRI.constrainRegClass(SrcReg0, RC);
7987 if (SrcReg1.isVirtual())
7988 MRI.constrainRegClass(SrcReg1, RC);
7989 if (SrcReg2.isVirtual())
7990 MRI.constrainRegClass(SrcReg2, RC);
7991
7993 if (kind == FMAInstKind::Default)
7994 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7995 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7996 .addReg(SrcReg1, getKillRegState(Src1IsKill))
7997 .addReg(SrcReg2, getKillRegState(Src2IsKill));
7998 else if (kind == FMAInstKind::Indexed)
7999 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8000 .addReg(SrcReg2, getKillRegState(Src2IsKill))
8001 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8002 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8003 .addImm(MUL->getOperand(3).getImm());
8004 else if (kind == FMAInstKind::Accumulator)
8005 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8006 .addReg(SrcReg2, getKillRegState(Src2IsKill))
8007 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8008 .addReg(SrcReg1, getKillRegState(Src1IsKill));
8009 else
8010 assert(false && "Invalid FMA instruction kind \n");
8011 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
8012 InsInstrs.push_back(MIB);
8013 return MUL;
8014}
8015
8016static MachineInstr *
8018 const TargetInstrInfo *TII, MachineInstr &Root,
8020 MachineInstr *MAD = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
8021
8022 unsigned Opc = 0;
8023 const TargetRegisterClass *RC = MRI.getRegClass(MAD->getOperand(0).getReg());
8024 if (AArch64::FPR32RegClass.hasSubClassEq(RC))
8025 Opc = AArch64::FNMADDSrrr;
8026 else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
8027 Opc = AArch64::FNMADDDrrr;
8028 else
8029 return nullptr;
8030
8031 Register ResultReg = Root.getOperand(0).getReg();
8032 Register SrcReg0 = MAD->getOperand(1).getReg();
8033 Register SrcReg1 = MAD->getOperand(2).getReg();
8034 Register SrcReg2 = MAD->getOperand(3).getReg();
8035 bool Src0IsKill = MAD->getOperand(1).isKill();
8036 bool Src1IsKill = MAD->getOperand(2).isKill();
8037 bool Src2IsKill = MAD->getOperand(3).isKill();
8038 if (ResultReg.isVirtual())
8039 MRI.constrainRegClass(ResultReg, RC);
8040 if (SrcReg0.isVirtual())
8041 MRI.constrainRegClass(SrcReg0, RC);
8042 if (SrcReg1.isVirtual())
8043 MRI.constrainRegClass(SrcReg1, RC);
8044 if (SrcReg2.isVirtual())
8045 MRI.constrainRegClass(SrcReg2, RC);
8046
8048 BuildMI(MF, MIMetadata(Root), TII->get(Opc), ResultReg)
8049 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8050 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8051 .addReg(SrcReg2, getKillRegState(Src2IsKill));
8052 InsInstrs.push_back(MIB);
8053
8054 return MAD;
8055}
8056
8057/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
8058static MachineInstr *
8061 unsigned IdxDupOp, unsigned MulOpc,
8063 assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
8064 "Invalid index of FMUL operand");
8065
8066 MachineFunction &MF = *Root.getMF();
8068
8069 MachineInstr *Dup =
8070 MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
8071
8072 if (Dup->getOpcode() == TargetOpcode::COPY)
8073 Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg());
8074
8075 Register DupSrcReg = Dup->getOperand(1).getReg();
8076 MRI.clearKillFlags(DupSrcReg);
8077 MRI.constrainRegClass(DupSrcReg, RC);
8078
8079 unsigned DupSrcLane = Dup->getOperand(2).getImm();
8080
8081 unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
8082 MachineOperand &MulOp = Root.getOperand(IdxMulOp);
8083
8084 Register ResultReg = Root.getOperand(0).getReg();
8085
8087 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MulOpc), ResultReg)
8088 .add(MulOp)
8089 .addReg(DupSrcReg)
8090 .addImm(DupSrcLane);
8091
8092 InsInstrs.push_back(MIB);
8093 return &Root;
8094}
8095
8096/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
8097/// instructions.
8098///
8099/// \see genFusedMultiply
8103 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
8104 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8106}
8107
8108/// genNeg - Helper to generate an intermediate negation of the second operand
8109/// of Root
8111 const TargetInstrInfo *TII, MachineInstr &Root,
8113 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
8114 unsigned MnegOpc, const TargetRegisterClass *RC) {
8115 Register NewVR = MRI.createVirtualRegister(RC);
8117 BuildMI(MF, MIMetadata(Root), TII->get(MnegOpc), NewVR)
8118 .add(Root.getOperand(2));
8119 InsInstrs.push_back(MIB);
8120
8121 assert(InstrIdxForVirtReg.empty());
8122 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8123
8124 return NewVR;
8125}
8126
8127/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
8128/// instructions with an additional negation of the accumulator
8132 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
8133 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
8134 assert(IdxMulOpd == 1);
8135
8136 Register NewVR =
8137 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
8138 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8139 FMAInstKind::Accumulator, &NewVR);
8140}
8141
8142/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
8143/// instructions.
8144///
8145/// \see genFusedMultiply
8149 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
8150 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8152}
8153
8154/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
8155/// instructions with an additional negation of the accumulator
8159 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
8160 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
8161 assert(IdxMulOpd == 1);
8162
8163 Register NewVR =
8164 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
8165
8166 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8167 FMAInstKind::Indexed, &NewVR);
8168}
8169
8170/// genMaddR - Generate madd instruction and combine mul and add using
8171/// an extra virtual register
8172/// Example - an ADD intermediate needs to be stored in a register:
8173/// MUL I=A,B,0
8174/// ADD R,I,Imm
8175/// ==> ORR V, ZR, Imm
8176/// ==> MADD R,A,B,V
8177/// \param MF Containing MachineFunction
8178/// \param MRI Register information
8179/// \param TII Target information
8180/// \param Root is the ADD instruction
8181/// \param [out] InsInstrs is a vector of machine instructions and will
8182/// contain the generated madd instruction
8183/// \param IdxMulOpd is index of operand in Root that is the result of
8184/// the MUL. In the example above IdxMulOpd is 1.
8185/// \param MaddOpc the opcode fo the madd instruction
8186/// \param VR is a virtual register that holds the value of an ADD operand
8187/// (V in the example above).
8188/// \param RC Register class of operands
8190 const TargetInstrInfo *TII, MachineInstr &Root,
8192 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
8193 const TargetRegisterClass *RC) {
8194 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
8195
8196 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
8197 Register ResultReg = Root.getOperand(0).getReg();
8198 Register SrcReg0 = MUL->getOperand(1).getReg();
8199 bool Src0IsKill = MUL->getOperand(1).isKill();
8200 Register SrcReg1 = MUL->getOperand(2).getReg();
8201 bool Src1IsKill = MUL->getOperand(2).isKill();
8202
8203 if (ResultReg.isVirtual())
8204 MRI.constrainRegClass(ResultReg, RC);
8205 if (SrcReg0.isVirtual())
8206 MRI.constrainRegClass(SrcReg0, RC);
8207 if (SrcReg1.isVirtual())
8208 MRI.constrainRegClass(SrcReg1, RC);
8210 MRI.constrainRegClass(VR, RC);
8211
8213 BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8214 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8215 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8216 .addReg(VR);
8217 // Insert the MADD
8218 InsInstrs.push_back(MIB);
8219 return MUL;
8220}
8221
8222/// Do the following transformation
8223/// A - (B + C) ==> (A - B) - C
8224/// A - (B + C) ==> (A - C) - B
8226 const TargetInstrInfo *TII, MachineInstr &Root,
8229 unsigned IdxOpd1,
8230 DenseMap<Register, unsigned> &InstrIdxForVirtReg) {
8231 assert(IdxOpd1 == 1 || IdxOpd1 == 2);
8232 unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1;
8233 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
8234
8235 Register ResultReg = Root.getOperand(0).getReg();
8236 Register RegA = Root.getOperand(1).getReg();
8237 bool RegAIsKill = Root.getOperand(1).isKill();
8238 Register RegB = AddMI->getOperand(IdxOpd1).getReg();
8239 bool RegBIsKill = AddMI->getOperand(IdxOpd1).isKill();
8240 Register RegC = AddMI->getOperand(IdxOtherOpd).getReg();
8241 bool RegCIsKill = AddMI->getOperand(IdxOtherOpd).isKill();
8242 Register NewVR =
8243 MRI.createVirtualRegister(MRI.getRegClass(Root.getOperand(2).getReg()));
8244
8245 unsigned Opcode = Root.getOpcode();
8246 if (Opcode == AArch64::SUBSWrr)
8247 Opcode = AArch64::SUBWrr;
8248 else if (Opcode == AArch64::SUBSXrr)
8249 Opcode = AArch64::SUBXrr;
8250 else
8251 assert((Opcode == AArch64::SUBWrr || Opcode == AArch64::SUBXrr) &&
8252 "Unexpected instruction opcode.");
8253
8254 uint32_t Flags = Root.mergeFlagsWith(*AddMI);
8255 Flags &= ~MachineInstr::NoSWrap;
8256 Flags &= ~MachineInstr::NoUWrap;
8257
8258 MachineInstrBuilder MIB1 =
8259 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), NewVR)
8260 .addReg(RegA, getKillRegState(RegAIsKill))
8261 .addReg(RegB, getKillRegState(RegBIsKill))
8262 .setMIFlags(Flags);
8263 MachineInstrBuilder MIB2 =
8264 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), ResultReg)
8265 .addReg(NewVR, getKillRegState(true))
8266 .addReg(RegC, getKillRegState(RegCIsKill))
8267 .setMIFlags(Flags);
8268
8269 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8270 InsInstrs.push_back(MIB1);
8271 InsInstrs.push_back(MIB2);
8272 DelInstrs.push_back(AddMI);
8273 DelInstrs.push_back(&Root);
8274}
8275
8276unsigned AArch64InstrInfo::getReduceOpcodeForAccumulator(
8277 unsigned int AccumulatorOpCode) const {
8278 switch (AccumulatorOpCode) {
8279 case AArch64::UABALB_ZZZ_D:
8280 case AArch64::SABALB_ZZZ_D:
8281 case AArch64::UABALT_ZZZ_D:
8282 case AArch64::SABALT_ZZZ_D:
8283 return AArch64::ADD_ZZZ_D;
8284 case AArch64::UABALB_ZZZ_H:
8285 case AArch64::SABALB_ZZZ_H:
8286 case AArch64::UABALT_ZZZ_H:
8287 case AArch64::SABALT_ZZZ_H:
8288 return AArch64::ADD_ZZZ_H;
8289 case AArch64::UABALB_ZZZ_S:
8290 case AArch64::SABALB_ZZZ_S:
8291 case AArch64::UABALT_ZZZ_S:
8292 case AArch64::SABALT_ZZZ_S:
8293 return AArch64::ADD_ZZZ_S;
8294 case AArch64::UABALv16i8_v8i16:
8295 case AArch64::SABALv8i8_v8i16:
8296 case AArch64::SABAv8i16:
8297 case AArch64::UABAv8i16:
8298 return AArch64::ADDv8i16;
8299 case AArch64::SABALv2i32_v2i64:
8300 case AArch64::UABALv2i32_v2i64:
8301 case AArch64::SABALv4i32_v2i64:
8302 return AArch64::ADDv2i64;
8303 case AArch64::UABALv4i16_v4i32:
8304 case AArch64::SABALv4i16_v4i32:
8305 case AArch64::SABALv8i16_v4i32:
8306 case AArch64::SABAv4i32:
8307 case AArch64::UABAv4i32:
8308 return AArch64::ADDv4i32;
8309 case AArch64::UABALv4i32_v2i64:
8310 return AArch64::ADDv2i64;
8311 case AArch64::UABALv8i16_v4i32:
8312 return AArch64::ADDv4i32;
8313 case AArch64::UABALv8i8_v8i16:
8314 case AArch64::SABALv16i8_v8i16:
8315 return AArch64::ADDv8i16;
8316 case AArch64::UABAv16i8:
8317 case AArch64::SABAv16i8:
8318 return AArch64::ADDv16i8;
8319 case AArch64::UABAv4i16:
8320 case AArch64::SABAv4i16:
8321 return AArch64::ADDv4i16;
8322 case AArch64::UABAv2i32:
8323 case AArch64::SABAv2i32:
8324 return AArch64::ADDv2i32;
8325 case AArch64::UABAv8i8:
8326 case AArch64::SABAv8i8:
8327 return AArch64::ADDv8i8;
8328 default:
8329 llvm_unreachable("Unknown accumulator opcode");
8330 }
8331}
8332
8333/// When getMachineCombinerPatterns() finds potential patterns,
8334/// this function generates the instructions that could replace the
8335/// original code sequence
8336void AArch64InstrInfo::genAlternativeCodeSequence(
8337 MachineInstr &Root, unsigned Pattern,
8340 DenseMap<Register, unsigned> &InstrIdxForVirtReg) const {
8341 MachineBasicBlock &MBB = *Root.getParent();
8342 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
8343 MachineFunction &MF = *MBB.getParent();
8344 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
8345
8346 MachineInstr *MUL = nullptr;
8347 const TargetRegisterClass *RC;
8348 unsigned Opc;
8349 switch (Pattern) {
8350 default:
8351 // Reassociate instructions.
8352 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
8353 DelInstrs, InstrIdxForVirtReg);
8354 return;
8356 // A - (B + C)
8357 // ==> (A - B) - C
8358 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1,
8359 InstrIdxForVirtReg);
8360 return;
8362 // A - (B + C)
8363 // ==> (A - C) - B
8364 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2,
8365 InstrIdxForVirtReg);
8366 return;
8369 // MUL I=A,B,0
8370 // ADD R,I,C
8371 // ==> MADD R,A,B,C
8372 // --- Create(MADD);
8374 Opc = AArch64::MADDWrrr;
8375 RC = &AArch64::GPR32RegClass;
8376 } else {
8377 Opc = AArch64::MADDXrrr;
8378 RC = &AArch64::GPR64RegClass;
8379 }
8380 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8381 break;
8384 // MUL I=A,B,0
8385 // ADD R,C,I
8386 // ==> MADD R,A,B,C
8387 // --- Create(MADD);
8389 Opc = AArch64::MADDWrrr;
8390 RC = &AArch64::GPR32RegClass;
8391 } else {
8392 Opc = AArch64::MADDXrrr;
8393 RC = &AArch64::GPR64RegClass;
8394 }
8395 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8396 break;
8401 // MUL I=A,B,0
8402 // ADD/SUB R,I,Imm
8403 // ==> MOV V, Imm/-Imm
8404 // ==> MADD R,A,B,V
8405 // --- Create(MADD);
8406 const TargetRegisterClass *RC;
8407 unsigned BitSize, MovImm;
8410 MovImm = AArch64::MOVi32imm;
8411 RC = &AArch64::GPR32spRegClass;
8412 BitSize = 32;
8413 Opc = AArch64::MADDWrrr;
8414 RC = &AArch64::GPR32RegClass;
8415 } else {
8416 MovImm = AArch64::MOVi64imm;
8417 RC = &AArch64::GPR64spRegClass;
8418 BitSize = 64;
8419 Opc = AArch64::MADDXrrr;
8420 RC = &AArch64::GPR64RegClass;
8421 }
8422 Register NewVR = MRI.createVirtualRegister(RC);
8423 uint64_t Imm = Root.getOperand(2).getImm();
8424
8425 if (Root.getOperand(3).isImm()) {
8426 unsigned Val = Root.getOperand(3).getImm();
8427 Imm = Imm << Val;
8428 }
8429 bool IsSub = Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1 ||
8431 uint64_t UImm = SignExtend64(IsSub ? -Imm : Imm, BitSize);
8432 // Check that the immediate can be composed via a single instruction.
8434 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
8435 if (Insn.size() != 1)
8436 return;
8437 MachineInstrBuilder MIB1 =
8438 BuildMI(MF, MIMetadata(Root), TII->get(MovImm), NewVR)
8439 .addImm(IsSub ? -Imm : Imm);
8440 InsInstrs.push_back(MIB1);
8441 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8442 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
8443 break;
8444 }
8447 // MUL I=A,B,0
8448 // SUB R,I, C
8449 // ==> SUB V, 0, C
8450 // ==> MADD R,A,B,V // = -C + A*B
8451 // --- Create(MADD);
8452 const TargetRegisterClass *SubRC;
8453 unsigned SubOpc, ZeroReg;
8455 SubOpc = AArch64::SUBWrr;
8456 SubRC = &AArch64::GPR32spRegClass;
8457 ZeroReg = AArch64::WZR;
8458 Opc = AArch64::MADDWrrr;
8459 RC = &AArch64::GPR32RegClass;
8460 } else {
8461 SubOpc = AArch64::SUBXrr;
8462 SubRC = &AArch64::GPR64spRegClass;
8463 ZeroReg = AArch64::XZR;
8464 Opc = AArch64::MADDXrrr;
8465 RC = &AArch64::GPR64RegClass;
8466 }
8467 Register NewVR = MRI.createVirtualRegister(SubRC);
8468 // SUB NewVR, 0, C
8469 MachineInstrBuilder MIB1 =
8470 BuildMI(MF, MIMetadata(Root), TII->get(SubOpc), NewVR)
8471 .addReg(ZeroReg)
8472 .add(Root.getOperand(2));
8473 InsInstrs.push_back(MIB1);
8474 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8475 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
8476 break;
8477 }
8480 // MUL I=A,B,0
8481 // SUB R,C,I
8482 // ==> MSUB R,A,B,C (computes C - A*B)
8483 // --- Create(MSUB);
8485 Opc = AArch64::MSUBWrrr;
8486 RC = &AArch64::GPR32RegClass;
8487 } else {
8488 Opc = AArch64::MSUBXrrr;
8489 RC = &AArch64::GPR64RegClass;
8490 }
8491 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8492 break;
8494 Opc = AArch64::MLAv8i8;
8495 RC = &AArch64::FPR64RegClass;
8496 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8497 break;
8499 Opc = AArch64::MLAv8i8;
8500 RC = &AArch64::FPR64RegClass;
8501 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8502 break;
8504 Opc = AArch64::MLAv16i8;
8505 RC = &AArch64::FPR128RegClass;
8506 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8507 break;
8509 Opc = AArch64::MLAv16i8;
8510 RC = &AArch64::FPR128RegClass;
8511 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8512 break;
8514 Opc = AArch64::MLAv4i16;
8515 RC = &AArch64::FPR64RegClass;
8516 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8517 break;
8519 Opc = AArch64::MLAv4i16;
8520 RC = &AArch64::FPR64RegClass;
8521 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8522 break;
8524 Opc = AArch64::MLAv8i16;
8525 RC = &AArch64::FPR128RegClass;
8526 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8527 break;
8529 Opc = AArch64::MLAv8i16;
8530 RC = &AArch64::FPR128RegClass;
8531 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8532 break;
8534 Opc = AArch64::MLAv2i32;
8535 RC = &AArch64::FPR64RegClass;
8536 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8537 break;
8539 Opc = AArch64::MLAv2i32;
8540 RC = &AArch64::FPR64RegClass;
8541 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8542 break;
8544 Opc = AArch64::MLAv4i32;
8545 RC = &AArch64::FPR128RegClass;
8546 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8547 break;
8549 Opc = AArch64::MLAv4i32;
8550 RC = &AArch64::FPR128RegClass;
8551 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8552 break;
8553
8555 Opc = AArch64::MLAv8i8;
8556 RC = &AArch64::FPR64RegClass;
8557 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8558 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
8559 RC);
8560 break;
8562 Opc = AArch64::MLSv8i8;
8563 RC = &AArch64::FPR64RegClass;
8564 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8565 break;
8567 Opc = AArch64::MLAv16i8;
8568 RC = &AArch64::FPR128RegClass;
8569 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8570 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
8571 RC);
8572 break;
8574 Opc = AArch64::MLSv16i8;
8575 RC = &AArch64::FPR128RegClass;
8576 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8577 break;
8579 Opc = AArch64::MLAv4i16;
8580 RC = &AArch64::FPR64RegClass;
8581 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8582 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
8583 RC);
8584 break;
8586 Opc = AArch64::MLSv4i16;
8587 RC = &AArch64::FPR64RegClass;
8588 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8589 break;
8591 Opc = AArch64::MLAv8i16;
8592 RC = &AArch64::FPR128RegClass;
8593 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8594 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
8595 RC);
8596 break;
8598 Opc = AArch64::MLSv8i16;
8599 RC = &AArch64::FPR128RegClass;
8600 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8601 break;
8603 Opc = AArch64::MLAv2i32;
8604 RC = &AArch64::FPR64RegClass;
8605 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8606 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
8607 RC);
8608 break;
8610 Opc = AArch64::MLSv2i32;
8611 RC = &AArch64::FPR64RegClass;
8612 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8613 break;
8615 Opc = AArch64::MLAv4i32;
8616 RC = &AArch64::FPR128RegClass;
8617 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8618 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
8619 RC);
8620 break;
8622 Opc = AArch64::MLSv4i32;
8623 RC = &AArch64::FPR128RegClass;
8624 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8625 break;
8626
8628 Opc = AArch64::MLAv4i16_indexed;
8629 RC = &AArch64::FPR64RegClass;
8630 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8631 break;
8633 Opc = AArch64::MLAv4i16_indexed;
8634 RC = &AArch64::FPR64RegClass;
8635 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8636 break;
8638 Opc = AArch64::MLAv8i16_indexed;
8639 RC = &AArch64::FPR128RegClass;
8640 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8641 break;
8643 Opc = AArch64::MLAv8i16_indexed;
8644 RC = &AArch64::FPR128RegClass;
8645 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8646 break;
8648 Opc = AArch64::MLAv2i32_indexed;
8649 RC = &AArch64::FPR64RegClass;
8650 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8651 break;
8653 Opc = AArch64::MLAv2i32_indexed;
8654 RC = &AArch64::FPR64RegClass;
8655 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8656 break;
8658 Opc = AArch64::MLAv4i32_indexed;
8659 RC = &AArch64::FPR128RegClass;
8660 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8661 break;
8663 Opc = AArch64::MLAv4i32_indexed;
8664 RC = &AArch64::FPR128RegClass;
8665 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8666 break;
8667
8669 Opc = AArch64::MLAv4i16_indexed;
8670 RC = &AArch64::FPR64RegClass;
8671 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8672 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
8673 RC);
8674 break;
8676 Opc = AArch64::MLSv4i16_indexed;
8677 RC = &AArch64::FPR64RegClass;
8678 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8679 break;
8681 Opc = AArch64::MLAv8i16_indexed;
8682 RC = &AArch64::FPR128RegClass;
8683 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8684 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
8685 RC);
8686 break;
8688 Opc = AArch64::MLSv8i16_indexed;
8689 RC = &AArch64::FPR128RegClass;
8690 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8691 break;
8693 Opc = AArch64::MLAv2i32_indexed;
8694 RC = &AArch64::FPR64RegClass;
8695 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8696 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
8697 RC);
8698 break;
8700 Opc = AArch64::MLSv2i32_indexed;
8701 RC = &AArch64::FPR64RegClass;
8702 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8703 break;
8705 Opc = AArch64::MLAv4i32_indexed;
8706 RC = &AArch64::FPR128RegClass;
8707 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8708 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
8709 RC);
8710 break;
8712 Opc = AArch64::MLSv4i32_indexed;
8713 RC = &AArch64::FPR128RegClass;
8714 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8715 break;
8716
8717 // Floating Point Support
8719 Opc = AArch64::FMADDHrrr;
8720 RC = &AArch64::FPR16RegClass;
8721 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8722 break;
8724 Opc = AArch64::FMADDSrrr;
8725 RC = &AArch64::FPR32RegClass;
8726 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8727 break;
8729 Opc = AArch64::FMADDDrrr;
8730 RC = &AArch64::FPR64RegClass;
8731 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8732 break;
8733
8735 Opc = AArch64::FMADDHrrr;
8736 RC = &AArch64::FPR16RegClass;
8737 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8738 break;
8740 Opc = AArch64::FMADDSrrr;
8741 RC = &AArch64::FPR32RegClass;
8742 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8743 break;
8745 Opc = AArch64::FMADDDrrr;
8746 RC = &AArch64::FPR64RegClass;
8747 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8748 break;
8749
8751 Opc = AArch64::FMLAv1i32_indexed;
8752 RC = &AArch64::FPR32RegClass;
8753 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8755 break;
8757 Opc = AArch64::FMLAv1i32_indexed;
8758 RC = &AArch64::FPR32RegClass;
8759 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8761 break;
8762
8764 Opc = AArch64::FMLAv1i64_indexed;
8765 RC = &AArch64::FPR64RegClass;
8766 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8768 break;
8770 Opc = AArch64::FMLAv1i64_indexed;
8771 RC = &AArch64::FPR64RegClass;
8772 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8774 break;
8775
8777 RC = &AArch64::FPR64RegClass;
8778 Opc = AArch64::FMLAv4i16_indexed;
8779 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8781 break;
8783 RC = &AArch64::FPR64RegClass;
8784 Opc = AArch64::FMLAv4f16;
8785 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8787 break;
8789 RC = &AArch64::FPR64RegClass;
8790 Opc = AArch64::FMLAv4i16_indexed;
8791 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8793 break;
8795 RC = &AArch64::FPR64RegClass;
8796 Opc = AArch64::FMLAv4f16;
8797 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8799 break;
8800
8803 RC = &AArch64::FPR64RegClass;
8805 Opc = AArch64::FMLAv2i32_indexed;
8806 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8808 } else {
8809 Opc = AArch64::FMLAv2f32;
8810 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8812 }
8813 break;
8816 RC = &AArch64::FPR64RegClass;
8818 Opc = AArch64::FMLAv2i32_indexed;
8819 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8821 } else {
8822 Opc = AArch64::FMLAv2f32;
8823 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8825 }
8826 break;
8827
8829 RC = &AArch64::FPR128RegClass;
8830 Opc = AArch64::FMLAv8i16_indexed;
8831 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8833 break;
8835 RC = &AArch64::FPR128RegClass;
8836 Opc = AArch64::FMLAv8f16;
8837 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8839 break;
8841 RC = &AArch64::FPR128RegClass;
8842 Opc = AArch64::FMLAv8i16_indexed;
8843 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8845 break;
8847 RC = &AArch64::FPR128RegClass;
8848 Opc = AArch64::FMLAv8f16;
8849 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8851 break;
8852
8855 RC = &AArch64::FPR128RegClass;
8857 Opc = AArch64::FMLAv2i64_indexed;
8858 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8860 } else {
8861 Opc = AArch64::FMLAv2f64;
8862 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8864 }
8865 break;
8868 RC = &AArch64::FPR128RegClass;
8870 Opc = AArch64::FMLAv2i64_indexed;
8871 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8873 } else {
8874 Opc = AArch64::FMLAv2f64;
8875 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8877 }
8878 break;
8879
8882 RC = &AArch64::FPR128RegClass;
8884 Opc = AArch64::FMLAv4i32_indexed;
8885 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8887 } else {
8888 Opc = AArch64::FMLAv4f32;
8889 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8891 }
8892 break;
8893
8896 RC = &AArch64::FPR128RegClass;
8898 Opc = AArch64::FMLAv4i32_indexed;
8899 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8901 } else {
8902 Opc = AArch64::FMLAv4f32;
8903 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8905 }
8906 break;
8907
8909 Opc = AArch64::FNMSUBHrrr;
8910 RC = &AArch64::FPR16RegClass;
8911 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8912 break;
8914 Opc = AArch64::FNMSUBSrrr;
8915 RC = &AArch64::FPR32RegClass;
8916 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8917 break;
8919 Opc = AArch64::FNMSUBDrrr;
8920 RC = &AArch64::FPR64RegClass;
8921 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8922 break;
8923
8925 Opc = AArch64::FNMADDHrrr;
8926 RC = &AArch64::FPR16RegClass;
8927 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8928 break;
8930 Opc = AArch64::FNMADDSrrr;
8931 RC = &AArch64::FPR32RegClass;
8932 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8933 break;
8935 Opc = AArch64::FNMADDDrrr;
8936 RC = &AArch64::FPR64RegClass;
8937 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8938 break;
8939
8941 Opc = AArch64::FMSUBHrrr;
8942 RC = &AArch64::FPR16RegClass;
8943 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8944 break;
8946 Opc = AArch64::FMSUBSrrr;
8947 RC = &AArch64::FPR32RegClass;
8948 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8949 break;
8951 Opc = AArch64::FMSUBDrrr;
8952 RC = &AArch64::FPR64RegClass;
8953 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8954 break;
8955
8957 Opc = AArch64::FMLSv1i32_indexed;
8958 RC = &AArch64::FPR32RegClass;
8959 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8961 break;
8962
8964 Opc = AArch64::FMLSv1i64_indexed;
8965 RC = &AArch64::FPR64RegClass;
8966 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8968 break;
8969
8972 RC = &AArch64::FPR64RegClass;
8973 Register NewVR = MRI.createVirtualRegister(RC);
8974 MachineInstrBuilder MIB1 =
8975 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f16), NewVR)
8976 .add(Root.getOperand(2));
8977 InsInstrs.push_back(MIB1);
8978 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8980 Opc = AArch64::FMLAv4f16;
8981 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8982 FMAInstKind::Accumulator, &NewVR);
8983 } else {
8984 Opc = AArch64::FMLAv4i16_indexed;
8985 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8986 FMAInstKind::Indexed, &NewVR);
8987 }
8988 break;
8989 }
8991 RC = &AArch64::FPR64RegClass;
8992 Opc = AArch64::FMLSv4f16;
8993 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8995 break;
8997 RC = &AArch64::FPR64RegClass;
8998 Opc = AArch64::FMLSv4i16_indexed;
8999 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9001 break;
9002
9005 RC = &AArch64::FPR64RegClass;
9007 Opc = AArch64::FMLSv2i32_indexed;
9008 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9010 } else {
9011 Opc = AArch64::FMLSv2f32;
9012 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9014 }
9015 break;
9016
9019 RC = &AArch64::FPR128RegClass;
9020 Register NewVR = MRI.createVirtualRegister(RC);
9021 MachineInstrBuilder MIB1 =
9022 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv8f16), NewVR)
9023 .add(Root.getOperand(2));
9024 InsInstrs.push_back(MIB1);
9025 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9027 Opc = AArch64::FMLAv8f16;
9028 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9029 FMAInstKind::Accumulator, &NewVR);
9030 } else {
9031 Opc = AArch64::FMLAv8i16_indexed;
9032 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9033 FMAInstKind::Indexed, &NewVR);
9034 }
9035 break;
9036 }
9038 RC = &AArch64::FPR128RegClass;
9039 Opc = AArch64::FMLSv8f16;
9040 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9042 break;
9044 RC = &AArch64::FPR128RegClass;
9045 Opc = AArch64::FMLSv8i16_indexed;
9046 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9048 break;
9049
9052 RC = &AArch64::FPR128RegClass;
9054 Opc = AArch64::FMLSv2i64_indexed;
9055 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9057 } else {
9058 Opc = AArch64::FMLSv2f64;
9059 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9061 }
9062 break;
9063
9066 RC = &AArch64::FPR128RegClass;
9068 Opc = AArch64::FMLSv4i32_indexed;
9069 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9071 } else {
9072 Opc = AArch64::FMLSv4f32;
9073 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9075 }
9076 break;
9079 RC = &AArch64::FPR64RegClass;
9080 Register NewVR = MRI.createVirtualRegister(RC);
9081 MachineInstrBuilder MIB1 =
9082 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f32), NewVR)
9083 .add(Root.getOperand(2));
9084 InsInstrs.push_back(MIB1);
9085 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9087 Opc = AArch64::FMLAv2i32_indexed;
9088 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9089 FMAInstKind::Indexed, &NewVR);
9090 } else {
9091 Opc = AArch64::FMLAv2f32;
9092 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9093 FMAInstKind::Accumulator, &NewVR);
9094 }
9095 break;
9096 }
9099 RC = &AArch64::FPR128RegClass;
9100 Register NewVR = MRI.createVirtualRegister(RC);
9101 MachineInstrBuilder MIB1 =
9102 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f32), NewVR)
9103 .add(Root.getOperand(2));
9104 InsInstrs.push_back(MIB1);
9105 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9107 Opc = AArch64::FMLAv4i32_indexed;
9108 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9109 FMAInstKind::Indexed, &NewVR);
9110 } else {
9111 Opc = AArch64::FMLAv4f32;
9112 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9113 FMAInstKind::Accumulator, &NewVR);
9114 }
9115 break;
9116 }
9119 RC = &AArch64::FPR128RegClass;
9120 Register NewVR = MRI.createVirtualRegister(RC);
9121 MachineInstrBuilder MIB1 =
9122 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f64), NewVR)
9123 .add(Root.getOperand(2));
9124 InsInstrs.push_back(MIB1);
9125 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9127 Opc = AArch64::FMLAv2i64_indexed;
9128 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9129 FMAInstKind::Indexed, &NewVR);
9130 } else {
9131 Opc = AArch64::FMLAv2f64;
9132 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9133 FMAInstKind::Accumulator, &NewVR);
9134 }
9135 break;
9136 }
9139 unsigned IdxDupOp =
9141 : 2;
9142 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
9143 &AArch64::FPR128RegClass, MRI);
9144 break;
9145 }
9148 unsigned IdxDupOp =
9150 : 2;
9151 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
9152 &AArch64::FPR128RegClass, MRI);
9153 break;
9154 }
9157 unsigned IdxDupOp =
9159 : 2;
9160 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
9161 &AArch64::FPR128_loRegClass, MRI);
9162 break;
9163 }
9166 unsigned IdxDupOp =
9168 : 2;
9169 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
9170 &AArch64::FPR128RegClass, MRI);
9171 break;
9172 }
9175 unsigned IdxDupOp =
9177 : 2;
9178 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
9179 &AArch64::FPR128_loRegClass, MRI);
9180 break;
9181 }
9183 MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
9184 break;
9185 }
9187 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9188 Pattern, 4);
9189 break;
9190 }
9192 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9193 Pattern, 8);
9194 break;
9195 }
9197 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9198 Pattern, 16);
9199 break;
9200 }
9201
9202 } // end switch (Pattern)
9203 // Record MUL and ADD/SUB for deletion
9204 if (MUL)
9205 DelInstrs.push_back(MUL);
9206 DelInstrs.push_back(&Root);
9207
9208 // Set the flags on the inserted instructions to be the merged flags of the
9209 // instructions that we have combined.
9210 uint32_t Flags = Root.getFlags();
9211 if (MUL)
9212 Flags = Root.mergeFlagsWith(*MUL);
9213 for (auto *MI : InsInstrs)
9214 MI->setFlags(Flags);
9215}
9216
9217/// Replace csincr-branch sequence by simple conditional branch
9218///
9219/// Examples:
9220/// 1. \code
9221/// csinc w9, wzr, wzr, <condition code>
9222/// tbnz w9, #0, 0x44
9223/// \endcode
9224/// to
9225/// \code
9226/// b.<inverted condition code>
9227/// \endcode
9228///
9229/// 2. \code
9230/// csinc w9, wzr, wzr, <condition code>
9231/// tbz w9, #0, 0x44
9232/// \endcode
9233/// to
9234/// \code
9235/// b.<condition code>
9236/// \endcode
9237///
9238/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
9239/// compare's constant operand is power of 2.
9240///
9241/// Examples:
9242/// \code
9243/// and w8, w8, #0x400
9244/// cbnz w8, L1
9245/// \endcode
9246/// to
9247/// \code
9248/// tbnz w8, #10, L1
9249/// \endcode
9250///
9251/// \param MI Conditional Branch
9252/// \return True when the simple conditional branch is generated
9253///
9255 bool IsNegativeBranch = false;
9256 bool IsTestAndBranch = false;
9257 unsigned TargetBBInMI = 0;
9258 switch (MI.getOpcode()) {
9259 default:
9260 llvm_unreachable("Unknown branch instruction?");
9261 case AArch64::Bcc:
9262 case AArch64::CBWPri:
9263 case AArch64::CBXPri:
9264 case AArch64::CBWPrr:
9265 case AArch64::CBXPrr:
9266 return false;
9267 case AArch64::CBZW:
9268 case AArch64::CBZX:
9269 TargetBBInMI = 1;
9270 break;
9271 case AArch64::CBNZW:
9272 case AArch64::CBNZX:
9273 TargetBBInMI = 1;
9274 IsNegativeBranch = true;
9275 break;
9276 case AArch64::TBZW:
9277 case AArch64::TBZX:
9278 TargetBBInMI = 2;
9279 IsTestAndBranch = true;
9280 break;
9281 case AArch64::TBNZW:
9282 case AArch64::TBNZX:
9283 TargetBBInMI = 2;
9284 IsNegativeBranch = true;
9285 IsTestAndBranch = true;
9286 break;
9287 }
9288 // So we increment a zero register and test for bits other
9289 // than bit 0? Conservatively bail out in case the verifier
9290 // missed this case.
9291 if (IsTestAndBranch && MI.getOperand(1).getImm())
9292 return false;
9293
9294 // Find Definition.
9295 assert(MI.getParent() && "Incomplete machine instruction\n");
9296 MachineBasicBlock *MBB = MI.getParent();
9297 MachineFunction *MF = MBB->getParent();
9299 Register VReg = MI.getOperand(0).getReg();
9300 if (!VReg.isVirtual())
9301 return false;
9302
9303 MachineInstr *DefMI = MRI->getVRegDef(VReg);
9304
9305 // Look through COPY instructions to find definition.
9306 while (DefMI->isCopy()) {
9307 Register CopyVReg = DefMI->getOperand(1).getReg();
9308 if (!MRI->hasOneNonDBGUse(CopyVReg))
9309 return false;
9310 if (!MRI->hasOneDef(CopyVReg))
9311 return false;
9312 DefMI = MRI->getVRegDef(CopyVReg);
9313 }
9314
9315 switch (DefMI->getOpcode()) {
9316 default:
9317 return false;
9318 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
9319 case AArch64::ANDWri:
9320 case AArch64::ANDXri: {
9321 if (IsTestAndBranch)
9322 return false;
9323 if (DefMI->getParent() != MBB)
9324 return false;
9325 if (!MRI->hasOneNonDBGUse(VReg))
9326 return false;
9327
9328 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
9330 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
9331 if (!isPowerOf2_64(Mask))
9332 return false;
9333
9334 MachineOperand &MO = DefMI->getOperand(1);
9335 Register NewReg = MO.getReg();
9336 if (!NewReg.isVirtual())
9337 return false;
9338
9339 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
9340
9341 MachineBasicBlock &RefToMBB = *MBB;
9342 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
9343 DebugLoc DL = MI.getDebugLoc();
9344 unsigned Imm = Log2_64(Mask);
9345 unsigned Opc = (Imm < 32)
9346 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
9347 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
9348 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
9349 .addReg(NewReg)
9350 .addImm(Imm)
9351 .addMBB(TBB);
9352 // Register lives on to the CBZ now.
9353 MO.setIsKill(false);
9354
9355 // For immediate smaller than 32, we need to use the 32-bit
9356 // variant (W) in all cases. Indeed the 64-bit variant does not
9357 // allow to encode them.
9358 // Therefore, if the input register is 64-bit, we need to take the
9359 // 32-bit sub-part.
9360 if (!Is32Bit && Imm < 32)
9361 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
9362 MI.eraseFromParent();
9363 return true;
9364 }
9365 // Look for CSINC
9366 case AArch64::CSINCWr:
9367 case AArch64::CSINCXr: {
9368 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
9369 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
9370 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
9371 DefMI->getOperand(2).getReg() == AArch64::XZR))
9372 return false;
9373
9374 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
9375 true) != -1)
9376 return false;
9377
9378 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
9379 // Convert only when the condition code is not modified between
9380 // the CSINC and the branch. The CC may be used by other
9381 // instructions in between.
9383 return false;
9384 MachineBasicBlock &RefToMBB = *MBB;
9385 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
9386 DebugLoc DL = MI.getDebugLoc();
9387 if (IsNegativeBranch)
9389 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
9390 MI.eraseFromParent();
9391 return true;
9392 }
9393 }
9394}
9395
9396std::pair<unsigned, unsigned>
9397AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
9398 const unsigned Mask = AArch64II::MO_FRAGMENT;
9399 return std::make_pair(TF & Mask, TF & ~Mask);
9400}
9401
9403AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
9404 using namespace AArch64II;
9405
9406 static const std::pair<unsigned, const char *> TargetFlags[] = {
9407 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
9408 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
9409 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
9410 {MO_HI12, "aarch64-hi12"}};
9411 return ArrayRef(TargetFlags);
9412}
9413
9415AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
9416 using namespace AArch64II;
9417
9418 static const std::pair<unsigned, const char *> TargetFlags[] = {
9419 {MO_COFFSTUB, "aarch64-coffstub"},
9420 {MO_GOT, "aarch64-got"},
9421 {MO_NC, "aarch64-nc"},
9422 {MO_S, "aarch64-s"},
9423 {MO_TLS, "aarch64-tls"},
9424 {MO_DLLIMPORT, "aarch64-dllimport"},
9425 {MO_PREL, "aarch64-prel"},
9426 {MO_TAGGED, "aarch64-tagged"},
9427 {MO_ARM64EC_CALLMANGLE, "aarch64-arm64ec-callmangle"},
9428 };
9429 return ArrayRef(TargetFlags);
9430}
9431
9433AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
9434 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9435 {{MOSuppressPair, "aarch64-suppress-pair"},
9436 {MOStridedAccess, "aarch64-strided-access"}};
9437 return ArrayRef(TargetFlags);
9438}
9439
9440/// Constants defining how certain sequences should be outlined.
9441/// This encompasses how an outlined function should be called, and what kind of
9442/// frame should be emitted for that outlined function.
9443///
9444/// \p MachineOutlinerDefault implies that the function should be called with
9445/// a save and restore of LR to the stack.
9446///
9447/// That is,
9448///
9449/// I1 Save LR OUTLINED_FUNCTION:
9450/// I2 --> BL OUTLINED_FUNCTION I1
9451/// I3 Restore LR I2
9452/// I3
9453/// RET
9454///
9455/// * Call construction overhead: 3 (save + BL + restore)
9456/// * Frame construction overhead: 1 (ret)
9457/// * Requires stack fixups? Yes
9458///
9459/// \p MachineOutlinerTailCall implies that the function is being created from
9460/// a sequence of instructions ending in a return.
9461///
9462/// That is,
9463///
9464/// I1 OUTLINED_FUNCTION:
9465/// I2 --> B OUTLINED_FUNCTION I1
9466/// RET I2
9467/// RET
9468///
9469/// * Call construction overhead: 1 (B)
9470/// * Frame construction overhead: 0 (Return included in sequence)
9471/// * Requires stack fixups? No
9472///
9473/// \p MachineOutlinerNoLRSave implies that the function should be called using
9474/// a BL instruction, but doesn't require LR to be saved and restored. This
9475/// happens when LR is known to be dead.
9476///
9477/// That is,
9478///
9479/// I1 OUTLINED_FUNCTION:
9480/// I2 --> BL OUTLINED_FUNCTION I1
9481/// I3 I2
9482/// I3
9483/// RET
9484///
9485/// * Call construction overhead: 1 (BL)
9486/// * Frame construction overhead: 1 (RET)
9487/// * Requires stack fixups? No
9488///
9489/// \p MachineOutlinerThunk implies that the function is being created from
9490/// a sequence of instructions ending in a call. The outlined function is
9491/// called with a BL instruction, and the outlined function tail-calls the
9492/// original call destination.
9493///
9494/// That is,
9495///
9496/// I1 OUTLINED_FUNCTION:
9497/// I2 --> BL OUTLINED_FUNCTION I1
9498/// BL f I2
9499/// B f
9500/// * Call construction overhead: 1 (BL)
9501/// * Frame construction overhead: 0
9502/// * Requires stack fixups? No
9503///
9504/// \p MachineOutlinerRegSave implies that the function should be called with a
9505/// save and restore of LR to an available register. This allows us to avoid
9506/// stack fixups. Note that this outlining variant is compatible with the
9507/// NoLRSave case.
9508///
9509/// That is,
9510///
9511/// I1 Save LR OUTLINED_FUNCTION:
9512/// I2 --> BL OUTLINED_FUNCTION I1
9513/// I3 Restore LR I2
9514/// I3
9515/// RET
9516///
9517/// * Call construction overhead: 3 (save + BL + restore)
9518/// * Frame construction overhead: 1 (ret)
9519/// * Requires stack fixups? No
9521 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
9522 MachineOutlinerTailCall, /// Only emit a branch.
9523 MachineOutlinerNoLRSave, /// Emit a call and return.
9524 MachineOutlinerThunk, /// Emit a call and tail-call.
9525 MachineOutlinerRegSave /// Same as default, but save to a register.
9526};
9527
9533
9535AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
9536 MachineFunction *MF = C.getMF();
9537 const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
9538 const AArch64RegisterInfo *ARI =
9539 static_cast<const AArch64RegisterInfo *>(&TRI);
9540 // Check if there is an available register across the sequence that we can
9541 // use.
9542 for (unsigned Reg : AArch64::GPR64RegClass) {
9543 if (!ARI->isReservedReg(*MF, Reg) &&
9544 Reg != AArch64::LR && // LR is not reserved, but don't use it.
9545 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
9546 Reg != AArch64::X17 && // Ditto for X17.
9547 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
9548 C.isAvailableInsideSeq(Reg, TRI))
9549 return Reg;
9550 }
9551 return Register();
9552}
9553
9554static bool
9556 const outliner::Candidate &b) {
9557 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
9558 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
9559
9560 return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) &&
9561 MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true);
9562}
9563
9564static bool
9566 const outliner::Candidate &b) {
9567 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
9568 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
9569
9570 return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
9571}
9572
9574 const outliner::Candidate &b) {
9575 const AArch64Subtarget &SubtargetA =
9577 const AArch64Subtarget &SubtargetB =
9578 b.getMF()->getSubtarget<AArch64Subtarget>();
9579 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
9580}
9581
9582std::optional<std::unique_ptr<outliner::OutlinedFunction>>
9583AArch64InstrInfo::getOutliningCandidateInfo(
9584 const MachineModuleInfo &MMI,
9585 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
9586 unsigned MinRepeats) const {
9587 unsigned SequenceSize = 0;
9588 for (auto &MI : RepeatedSequenceLocs[0])
9589 SequenceSize += getInstSizeInBytes(MI);
9590
9591 unsigned NumBytesToCreateFrame = 0;
9592
9593 // We only allow outlining for functions having exactly matching return
9594 // address signing attributes, i.e., all share the same value for the
9595 // attribute "sign-return-address" and all share the same type of key they
9596 // are signed with.
9597 // Additionally we require all functions to simultaneously either support
9598 // v8.3a features or not. Otherwise an outlined function could get signed
9599 // using dedicated v8.3 instructions and a call from a function that doesn't
9600 // support v8.3 instructions would therefore be invalid.
9601 if (std::adjacent_find(
9602 RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
9603 [](const outliner::Candidate &a, const outliner::Candidate &b) {
9604 // Return true if a and b are non-equal w.r.t. return address
9605 // signing or support of v8.3a features
9606 if (outliningCandidatesSigningScopeConsensus(a, b) &&
9607 outliningCandidatesSigningKeyConsensus(a, b) &&
9608 outliningCandidatesV8_3OpsConsensus(a, b)) {
9609 return false;
9610 }
9611 return true;
9612 }) != RepeatedSequenceLocs.end()) {
9613 return std::nullopt;
9614 }
9615
9616 // Since at this point all candidates agree on their return address signing
9617 // picking just one is fine. If the candidate functions potentially sign their
9618 // return addresses, the outlined function should do the same. Note that in
9619 // the case of "sign-return-address"="non-leaf" this is an assumption: It is
9620 // not certainly true that the outlined function will have to sign its return
9621 // address but this decision is made later, when the decision to outline
9622 // has already been made.
9623 // The same holds for the number of additional instructions we need: On
9624 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
9625 // necessary. However, at this point we don't know if the outlined function
9626 // will have a RET instruction so we assume the worst.
9627 const TargetRegisterInfo &TRI = getRegisterInfo();
9628 // Performing a tail call may require extra checks when PAuth is enabled.
9629 // If PAuth is disabled, set it to zero for uniformity.
9630 unsigned NumBytesToCheckLRInTCEpilogue = 0;
9631 if (RepeatedSequenceLocs[0]
9632 .getMF()
9633 ->getInfo<AArch64FunctionInfo>()
9634 ->shouldSignReturnAddress(true)) {
9635 // One PAC and one AUT instructions
9636 NumBytesToCreateFrame += 8;
9637
9638 // PAuth is enabled - set extra tail call cost, if any.
9639 auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod(
9640 *RepeatedSequenceLocs[0].getMF());
9641 NumBytesToCheckLRInTCEpilogue =
9643 // Checking the authenticated LR value may significantly impact
9644 // SequenceSize, so account for it for more precise results.
9645 if (isTailCallReturnInst(RepeatedSequenceLocs[0].back()))
9646 SequenceSize += NumBytesToCheckLRInTCEpilogue;
9647
9648 // We have to check if sp modifying instructions would get outlined.
9649 // If so we only allow outlining if sp is unchanged overall, so matching
9650 // sub and add instructions are okay to outline, all other sp modifications
9651 // are not
9652 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
9653 int SPValue = 0;
9654 for (auto &MI : C) {
9655 if (MI.modifiesRegister(AArch64::SP, &TRI)) {
9656 switch (MI.getOpcode()) {
9657 case AArch64::ADDXri:
9658 case AArch64::ADDWri:
9659 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
9660 assert(MI.getOperand(2).isImm() &&
9661 "Expected operand to be immediate");
9662 assert(MI.getOperand(1).isReg() &&
9663 "Expected operand to be a register");
9664 // Check if the add just increments sp. If so, we search for
9665 // matching sub instructions that decrement sp. If not, the
9666 // modification is illegal
9667 if (MI.getOperand(1).getReg() == AArch64::SP)
9668 SPValue += MI.getOperand(2).getImm();
9669 else
9670 return true;
9671 break;
9672 case AArch64::SUBXri:
9673 case AArch64::SUBWri:
9674 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
9675 assert(MI.getOperand(2).isImm() &&
9676 "Expected operand to be immediate");
9677 assert(MI.getOperand(1).isReg() &&
9678 "Expected operand to be a register");
9679 // Check if the sub just decrements sp. If so, we search for
9680 // matching add instructions that increment sp. If not, the
9681 // modification is illegal
9682 if (MI.getOperand(1).getReg() == AArch64::SP)
9683 SPValue -= MI.getOperand(2).getImm();
9684 else
9685 return true;
9686 break;
9687 default:
9688 return true;
9689 }
9690 }
9691 }
9692 if (SPValue)
9693 return true;
9694 return false;
9695 };
9696 // Remove candidates with illegal stack modifying instructions
9697 llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
9698
9699 // If the sequence doesn't have enough candidates left, then we're done.
9700 if (RepeatedSequenceLocs.size() < MinRepeats)
9701 return std::nullopt;
9702 }
9703
9704 // Properties about candidate MBBs that hold for all of them.
9705 unsigned FlagsSetInAll = 0xF;
9706
9707 // Compute liveness information for each candidate, and set FlagsSetInAll.
9708 for (outliner::Candidate &C : RepeatedSequenceLocs)
9709 FlagsSetInAll &= C.Flags;
9710
9711 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
9712
9713 // Helper lambda which sets call information for every candidate.
9714 auto SetCandidateCallInfo =
9715 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
9716 for (outliner::Candidate &C : RepeatedSequenceLocs)
9717 C.setCallInfo(CallID, NumBytesForCall);
9718 };
9719
9720 unsigned FrameID = MachineOutlinerDefault;
9721 NumBytesToCreateFrame += 4;
9722
9723 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
9724 return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
9725 });
9726
9727 // We check to see if CFI Instructions are present, and if they are
9728 // we find the number of CFI Instructions in the candidates.
9729 unsigned CFICount = 0;
9730 for (auto &I : RepeatedSequenceLocs[0]) {
9731 if (I.isCFIInstruction())
9732 CFICount++;
9733 }
9734
9735 // We compare the number of found CFI Instructions to the number of CFI
9736 // instructions in the parent function for each candidate. We must check this
9737 // since if we outline one of the CFI instructions in a function, we have to
9738 // outline them all for correctness. If we do not, the address offsets will be
9739 // incorrect between the two sections of the program.
9740 for (outliner::Candidate &C : RepeatedSequenceLocs) {
9741 std::vector<MCCFIInstruction> CFIInstructions =
9742 C.getMF()->getFrameInstructions();
9743
9744 if (CFICount > 0 && CFICount != CFIInstructions.size())
9745 return std::nullopt;
9746 }
9747
9748 // Returns true if an instructions is safe to fix up, false otherwise.
9749 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
9750 if (MI.isCall())
9751 return true;
9752
9753 if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
9754 !MI.readsRegister(AArch64::SP, &TRI))
9755 return true;
9756
9757 // Any modification of SP will break our code to save/restore LR.
9758 // FIXME: We could handle some instructions which add a constant
9759 // offset to SP, with a bit more work.
9760 if (MI.modifiesRegister(AArch64::SP, &TRI))
9761 return false;
9762
9763 // At this point, we have a stack instruction that we might need to
9764 // fix up. We'll handle it if it's a load or store.
9765 if (MI.mayLoadOrStore()) {
9766 const MachineOperand *Base; // Filled with the base operand of MI.
9767 int64_t Offset; // Filled with the offset of MI.
9768 bool OffsetIsScalable;
9769
9770 // Does it allow us to offset the base operand and is the base the
9771 // register SP?
9772 if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
9773 !Base->isReg() || Base->getReg() != AArch64::SP)
9774 return false;
9775
9776 // Fixe-up code below assumes bytes.
9777 if (OffsetIsScalable)
9778 return false;
9779
9780 // Find the minimum/maximum offset for this instruction and check
9781 // if fixing it up would be in range.
9782 int64_t MinOffset,
9783 MaxOffset; // Unscaled offsets for the instruction.
9784 // The scale to multiply the offsets by.
9785 TypeSize Scale(0U, false), DummyWidth(0U, false);
9786 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
9787
9788 Offset += 16; // Update the offset to what it would be if we outlined.
9789 if (Offset < MinOffset * (int64_t)Scale.getFixedValue() ||
9790 Offset > MaxOffset * (int64_t)Scale.getFixedValue())
9791 return false;
9792
9793 // It's in range, so we can outline it.
9794 return true;
9795 }
9796
9797 // FIXME: Add handling for instructions like "add x0, sp, #8".
9798
9799 // We can't fix it up, so don't outline it.
9800 return false;
9801 };
9802
9803 // True if it's possible to fix up each stack instruction in this sequence.
9804 // Important for frames/call variants that modify the stack.
9805 bool AllStackInstrsSafe =
9806 llvm::all_of(RepeatedSequenceLocs[0], IsSafeToFixup);
9807
9808 // If the last instruction in any candidate is a terminator, then we should
9809 // tail call all of the candidates.
9810 if (RepeatedSequenceLocs[0].back().isTerminator()) {
9811 FrameID = MachineOutlinerTailCall;
9812 NumBytesToCreateFrame = 0;
9813 unsigned NumBytesForCall = 4 + NumBytesToCheckLRInTCEpilogue;
9814 SetCandidateCallInfo(MachineOutlinerTailCall, NumBytesForCall);
9815 }
9816
9817 else if (LastInstrOpcode == AArch64::BL ||
9818 ((LastInstrOpcode == AArch64::BLR ||
9819 LastInstrOpcode == AArch64::BLRNoIP) &&
9820 !HasBTI)) {
9821 // FIXME: Do we need to check if the code after this uses the value of LR?
9822 FrameID = MachineOutlinerThunk;
9823 NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue;
9824 SetCandidateCallInfo(MachineOutlinerThunk, 4);
9825 }
9826
9827 else {
9828 // We need to decide how to emit calls + frames. We can always emit the same
9829 // frame if we don't need to save to the stack. If we have to save to the
9830 // stack, then we need a different frame.
9831 unsigned NumBytesNoStackCalls = 0;
9832 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
9833
9834 // Check if we have to save LR.
9835 for (outliner::Candidate &C : RepeatedSequenceLocs) {
9836 bool LRAvailable =
9838 ? C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI)
9839 : true;
9840 // If we have a noreturn caller, then we're going to be conservative and
9841 // say that we have to save LR. If we don't have a ret at the end of the
9842 // block, then we can't reason about liveness accurately.
9843 //
9844 // FIXME: We can probably do better than always disabling this in
9845 // noreturn functions by fixing up the liveness info.
9846 bool IsNoReturn =
9847 C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
9848
9849 // Is LR available? If so, we don't need a save.
9850 if (LRAvailable && !IsNoReturn) {
9851 NumBytesNoStackCalls += 4;
9852 C.setCallInfo(MachineOutlinerNoLRSave, 4);
9853 CandidatesWithoutStackFixups.push_back(C);
9854 }
9855
9856 // Is an unused register available? If so, we won't modify the stack, so
9857 // we can outline with the same frame type as those that don't save LR.
9858 else if (findRegisterToSaveLRTo(C)) {
9859 NumBytesNoStackCalls += 12;
9860 C.setCallInfo(MachineOutlinerRegSave, 12);
9861 CandidatesWithoutStackFixups.push_back(C);
9862 }
9863
9864 // Is SP used in the sequence at all? If not, we don't have to modify
9865 // the stack, so we are guaranteed to get the same frame.
9866 else if (C.isAvailableInsideSeq(AArch64::SP, TRI)) {
9867 NumBytesNoStackCalls += 12;
9868 C.setCallInfo(MachineOutlinerDefault, 12);
9869 CandidatesWithoutStackFixups.push_back(C);
9870 }
9871
9872 // If we outline this, we need to modify the stack. Pretend we don't
9873 // outline this by saving all of its bytes.
9874 else {
9875 NumBytesNoStackCalls += SequenceSize;
9876 }
9877 }
9878
9879 // If there are no places where we have to save LR, then note that we
9880 // don't have to update the stack. Otherwise, give every candidate the
9881 // default call type, as long as it's safe to do so.
9882 if (!AllStackInstrsSafe ||
9883 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
9884 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
9885 FrameID = MachineOutlinerNoLRSave;
9886 if (RepeatedSequenceLocs.size() < MinRepeats)
9887 return std::nullopt;
9888 } else {
9889 SetCandidateCallInfo(MachineOutlinerDefault, 12);
9890
9891 // Bugzilla ID: 46767
9892 // TODO: Check if fixing up the stack more than once is safe so we can
9893 // outline these.
9894 //
9895 // An outline resulting in a caller that requires stack fixups at the
9896 // callsite to a callee that also requires stack fixups can happen when
9897 // there are no available registers at the candidate callsite for a
9898 // candidate that itself also has calls.
9899 //
9900 // In other words if function_containing_sequence in the following pseudo
9901 // assembly requires that we save LR at the point of the call, but there
9902 // are no available registers: in this case we save using SP and as a
9903 // result the SP offsets requires stack fixups by multiples of 16.
9904 //
9905 // function_containing_sequence:
9906 // ...
9907 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
9908 // call OUTLINED_FUNCTION_N
9909 // restore LR from SP
9910 // ...
9911 //
9912 // OUTLINED_FUNCTION_N:
9913 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
9914 // ...
9915 // bl foo
9916 // restore LR from SP
9917 // ret
9918 //
9919 // Because the code to handle more than one stack fixup does not
9920 // currently have the proper checks for legality, these cases will assert
9921 // in the AArch64 MachineOutliner. This is because the code to do this
9922 // needs more hardening, testing, better checks that generated code is
9923 // legal, etc and because it is only verified to handle a single pass of
9924 // stack fixup.
9925 //
9926 // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
9927 // these cases until they are known to be handled. Bugzilla 46767 is
9928 // referenced in comments at the assert site.
9929 //
9930 // To avoid asserting (or generating non-legal code on noassert builds)
9931 // we remove all candidates which would need more than one stack fixup by
9932 // pruning the cases where the candidate has calls while also having no
9933 // available LR and having no available general purpose registers to copy
9934 // LR to (ie one extra stack save/restore).
9935 //
9936 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
9937 erase_if(RepeatedSequenceLocs, [this, &TRI](outliner::Candidate &C) {
9938 auto IsCall = [](const MachineInstr &MI) { return MI.isCall(); };
9939 return (llvm::any_of(C, IsCall)) &&
9940 (!C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) ||
9941 !findRegisterToSaveLRTo(C));
9942 });
9943 }
9944 }
9945
9946 // If we dropped all of the candidates, bail out here.
9947 if (RepeatedSequenceLocs.size() < MinRepeats)
9948 return std::nullopt;
9949 }
9950
9951 // Does every candidate's MBB contain a call? If so, then we might have a call
9952 // in the range.
9953 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
9954 // Check if the range contains a call. These require a save + restore of the
9955 // link register.
9956 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
9957 bool ModStackToSaveLR = false;
9958 if (any_of(drop_end(FirstCand),
9959 [](const MachineInstr &MI) { return MI.isCall(); }))
9960 ModStackToSaveLR = true;
9961
9962 // Handle the last instruction separately. If this is a tail call, then the
9963 // last instruction is a call. We don't want to save + restore in this case.
9964 // However, it could be possible that the last instruction is a call without
9965 // it being valid to tail call this sequence. We should consider this as
9966 // well.
9967 else if (FrameID != MachineOutlinerThunk &&
9968 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
9969 ModStackToSaveLR = true;
9970
9971 if (ModStackToSaveLR) {
9972 // We can't fix up the stack. Bail out.
9973 if (!AllStackInstrsSafe)
9974 return std::nullopt;
9975
9976 // Save + restore LR.
9977 NumBytesToCreateFrame += 8;
9978 }
9979 }
9980
9981 // If we have CFI instructions, we can only outline if the outlined section
9982 // can be a tail call
9983 if (FrameID != MachineOutlinerTailCall && CFICount > 0)
9984 return std::nullopt;
9985
9986 return std::make_unique<outliner::OutlinedFunction>(
9987 RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID);
9988}
9989
9990void AArch64InstrInfo::mergeOutliningCandidateAttributes(
9991 Function &F, std::vector<outliner::Candidate> &Candidates) const {
9992 // If a bunch of candidates reach this point they must agree on their return
9993 // address signing. It is therefore enough to just consider the signing
9994 // behaviour of one of them
9995 const auto &CFn = Candidates.front().getMF()->getFunction();
9996
9997 if (CFn.hasFnAttribute("ptrauth-returns"))
9998 F.addFnAttr(CFn.getFnAttribute("ptrauth-returns"));
9999 if (CFn.hasFnAttribute("ptrauth-auth-traps"))
10000 F.addFnAttr(CFn.getFnAttribute("ptrauth-auth-traps"));
10001 // Since all candidates belong to the same module, just copy the
10002 // function-level attributes of an arbitrary function.
10003 if (CFn.hasFnAttribute("sign-return-address"))
10004 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
10005 if (CFn.hasFnAttribute("sign-return-address-key"))
10006 F.addFnAttr(CFn.getFnAttribute("sign-return-address-key"));
10007
10008 AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
10009}
10010
10011bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
10012 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
10013 const Function &F = MF.getFunction();
10014
10015 // Can F be deduplicated by the linker? If it can, don't outline from it.
10016 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
10017 return false;
10018
10019 // Don't outline from functions with section markings; the program could
10020 // expect that all the code is in the named section.
10021 // FIXME: Allow outlining from multiple functions with the same section
10022 // marking.
10023 if (F.hasSection())
10024 return false;
10025
10026 // Outlining from functions with redzones is unsafe since the outliner may
10027 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
10028 // outline from it.
10029 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
10030 if (!AFI || AFI->hasRedZone().value_or(true))
10031 return false;
10032
10033 // FIXME: Determine whether it is safe to outline from functions which contain
10034 // streaming-mode changes. We may need to ensure any smstart/smstop pairs are
10035 // outlined together and ensure it is safe to outline with async unwind info,
10036 // required for saving & restoring VG around calls.
10037 if (AFI->hasStreamingModeChanges())
10038 return false;
10039
10040 // FIXME: Teach the outliner to generate/handle Windows unwind info.
10042 return false;
10043
10044 // It's safe to outline from MF.
10045 return true;
10046}
10047
10049AArch64InstrInfo::getOutlinableRanges(MachineBasicBlock &MBB,
10050 unsigned &Flags) const {
10052 "Must track liveness!");
10054 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
10055 Ranges;
10056 // According to the AArch64 Procedure Call Standard, the following are
10057 // undefined on entry/exit from a function call:
10058 //
10059 // * Registers x16, x17, (and thus w16, w17)
10060 // * Condition codes (and thus the NZCV register)
10061 //
10062 // If any of these registers are used inside or live across an outlined
10063 // function, then they may be modified later, either by the compiler or
10064 // some other tool (like the linker).
10065 //
10066 // To avoid outlining in these situations, partition each block into ranges
10067 // where these registers are dead. We will only outline from those ranges.
10068 LiveRegUnits LRU(getRegisterInfo());
10069 auto AreAllUnsafeRegsDead = [&LRU]() {
10070 return LRU.available(AArch64::W16) && LRU.available(AArch64::W17) &&
10071 LRU.available(AArch64::NZCV);
10072 };
10073
10074 // We need to know if LR is live across an outlining boundary later on in
10075 // order to decide how we'll create the outlined call, frame, etc.
10076 //
10077 // It's pretty expensive to check this for *every candidate* within a block.
10078 // That's some potentially n^2 behaviour, since in the worst case, we'd need
10079 // to compute liveness from the end of the block for O(n) candidates within
10080 // the block.
10081 //
10082 // So, to improve the average case, let's keep track of liveness from the end
10083 // of the block to the beginning of *every outlinable range*. If we know that
10084 // LR is available in every range we could outline from, then we know that
10085 // we don't need to check liveness for any candidate within that range.
10086 bool LRAvailableEverywhere = true;
10087 // Compute liveness bottom-up.
10088 LRU.addLiveOuts(MBB);
10089 // Update flags that require info about the entire MBB.
10090 auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) {
10091 if (MI.isCall() && !MI.isTerminator())
10093 };
10094 // Range: [RangeBegin, RangeEnd)
10095 MachineBasicBlock::instr_iterator RangeBegin, RangeEnd;
10096 unsigned RangeLen;
10097 auto CreateNewRangeStartingAt =
10098 [&RangeBegin, &RangeEnd,
10099 &RangeLen](MachineBasicBlock::instr_iterator NewBegin) {
10100 RangeBegin = NewBegin;
10101 RangeEnd = std::next(RangeBegin);
10102 RangeLen = 0;
10103 };
10104 auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() {
10105 // At least one unsafe register is not dead. We do not want to outline at
10106 // this point. If it is long enough to outline from and does not cross a
10107 // bundle boundary, save the range [RangeBegin, RangeEnd).
10108 if (RangeLen <= 1)
10109 return;
10110 if (!RangeBegin.isEnd() && RangeBegin->isBundledWithPred())
10111 return;
10112 if (!RangeEnd.isEnd() && RangeEnd->isBundledWithPred())
10113 return;
10114 Ranges.emplace_back(RangeBegin, RangeEnd);
10115 };
10116 // Find the first point where all unsafe registers are dead.
10117 // FIND: <safe instr> <-- end of first potential range
10118 // SKIP: <unsafe def>
10119 // SKIP: ... everything between ...
10120 // SKIP: <unsafe use>
10121 auto FirstPossibleEndPt = MBB.instr_rbegin();
10122 for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) {
10123 LRU.stepBackward(*FirstPossibleEndPt);
10124 // Update flags that impact how we outline across the entire block,
10125 // regardless of safety.
10126 UpdateWholeMBBFlags(*FirstPossibleEndPt);
10127 if (AreAllUnsafeRegsDead())
10128 break;
10129 }
10130 // If we exhausted the entire block, we have no safe ranges to outline.
10131 if (FirstPossibleEndPt == MBB.instr_rend())
10132 return Ranges;
10133 // Current range.
10134 CreateNewRangeStartingAt(FirstPossibleEndPt->getIterator());
10135 // StartPt points to the first place where all unsafe registers
10136 // are dead (if there is any such point). Begin partitioning the MBB into
10137 // ranges.
10138 for (auto &MI : make_range(FirstPossibleEndPt, MBB.instr_rend())) {
10139 LRU.stepBackward(MI);
10140 UpdateWholeMBBFlags(MI);
10141 if (!AreAllUnsafeRegsDead()) {
10142 SaveRangeIfNonEmpty();
10143 CreateNewRangeStartingAt(MI.getIterator());
10144 continue;
10145 }
10146 LRAvailableEverywhere &= LRU.available(AArch64::LR);
10147 RangeBegin = MI.getIterator();
10148 ++RangeLen;
10149 }
10150 // Above loop misses the last (or only) range. If we are still safe, then
10151 // let's save the range.
10152 if (AreAllUnsafeRegsDead())
10153 SaveRangeIfNonEmpty();
10154 if (Ranges.empty())
10155 return Ranges;
10156 // We found the ranges bottom-up. Mapping expects the top-down. Reverse
10157 // the order.
10158 std::reverse(Ranges.begin(), Ranges.end());
10159 // If there is at least one outlinable range where LR is unavailable
10160 // somewhere, remember that.
10161 if (!LRAvailableEverywhere)
10163 return Ranges;
10164}
10165
10167AArch64InstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
10169 unsigned Flags) const {
10170 MachineInstr &MI = *MIT;
10171
10172 // Don't outline anything used for return address signing. The outlined
10173 // function will get signed later if needed
10174 switch (MI.getOpcode()) {
10175 case AArch64::PACM:
10176 case AArch64::PACIASP:
10177 case AArch64::PACIBSP:
10178 case AArch64::PACIASPPC:
10179 case AArch64::PACIBSPPC:
10180 case AArch64::AUTIASP:
10181 case AArch64::AUTIBSP:
10182 case AArch64::AUTIASPPCi:
10183 case AArch64::AUTIASPPCr:
10184 case AArch64::AUTIBSPPCi:
10185 case AArch64::AUTIBSPPCr:
10186 case AArch64::RETAA:
10187 case AArch64::RETAB:
10188 case AArch64::RETAASPPCi:
10189 case AArch64::RETAASPPCr:
10190 case AArch64::RETABSPPCi:
10191 case AArch64::RETABSPPCr:
10192 case AArch64::EMITBKEY:
10193 case AArch64::PAUTH_PROLOGUE:
10194 case AArch64::PAUTH_EPILOGUE:
10196 }
10197
10198 // We can only outline these if we will tail call the outlined function, or
10199 // fix up the CFI offsets. Currently, CFI instructions are outlined only if
10200 // in a tail call.
10201 //
10202 // FIXME: If the proper fixups for the offset are implemented, this should be
10203 // possible.
10204 if (MI.isCFIInstruction())
10206
10207 // Is this a terminator for a basic block?
10208 if (MI.isTerminator())
10209 // TargetInstrInfo::getOutliningType has already filtered out anything
10210 // that would break this, so we can allow it here.
10212
10213 // Make sure none of the operands are un-outlinable.
10214 for (const MachineOperand &MOP : MI.operands()) {
10215 // A check preventing CFI indices was here before, but only CFI
10216 // instructions should have those.
10217 assert(!MOP.isCFIIndex());
10218
10219 // If it uses LR or W30 explicitly, then don't touch it.
10220 if (MOP.isReg() && !MOP.isImplicit() &&
10221 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
10223 }
10224
10225 // Special cases for instructions that can always be outlined, but will fail
10226 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
10227 // be outlined because they don't require a *specific* value to be in LR.
10228 if (MI.getOpcode() == AArch64::ADRP)
10230
10231 // If MI is a call we might be able to outline it. We don't want to outline
10232 // any calls that rely on the position of items on the stack. When we outline
10233 // something containing a call, we have to emit a save and restore of LR in
10234 // the outlined function. Currently, this always happens by saving LR to the
10235 // stack. Thus, if we outline, say, half the parameters for a function call
10236 // plus the call, then we'll break the callee's expectations for the layout
10237 // of the stack.
10238 //
10239 // FIXME: Allow calls to functions which construct a stack frame, as long
10240 // as they don't access arguments on the stack.
10241 // FIXME: Figure out some way to analyze functions defined in other modules.
10242 // We should be able to compute the memory usage based on the IR calling
10243 // convention, even if we can't see the definition.
10244 if (MI.isCall()) {
10245 // Get the function associated with the call. Look at each operand and find
10246 // the one that represents the callee and get its name.
10247 const Function *Callee = nullptr;
10248 for (const MachineOperand &MOP : MI.operands()) {
10249 if (MOP.isGlobal()) {
10250 Callee = dyn_cast<Function>(MOP.getGlobal());
10251 break;
10252 }
10253 }
10254
10255 // Never outline calls to mcount. There isn't any rule that would require
10256 // this, but the Linux kernel's "ftrace" feature depends on it.
10257 if (Callee && Callee->getName() == "\01_mcount")
10259
10260 // If we don't know anything about the callee, assume it depends on the
10261 // stack layout of the caller. In that case, it's only legal to outline
10262 // as a tail-call. Explicitly list the call instructions we know about so we
10263 // don't get unexpected results with call pseudo-instructions.
10264 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
10265 if (MI.getOpcode() == AArch64::BLR ||
10266 MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
10267 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
10268
10269 if (!Callee)
10270 return UnknownCallOutlineType;
10271
10272 // We have a function we have information about. Check it if it's something
10273 // can safely outline.
10274 MachineFunction *CalleeMF = MMI.getMachineFunction(*Callee);
10275
10276 // We don't know what's going on with the callee at all. Don't touch it.
10277 if (!CalleeMF)
10278 return UnknownCallOutlineType;
10279
10280 // Check if we know anything about the callee saves on the function. If we
10281 // don't, then don't touch it, since that implies that we haven't
10282 // computed anything about its stack frame yet.
10283 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
10284 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
10285 MFI.getNumObjects() > 0)
10286 return UnknownCallOutlineType;
10287
10288 // At this point, we can say that CalleeMF ought to not pass anything on the
10289 // stack. Therefore, we can outline it.
10291 }
10292
10293 // Don't touch the link register or W30.
10294 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
10295 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
10297
10298 // Don't outline BTI instructions, because that will prevent the outlining
10299 // site from being indirectly callable.
10300 if (hasBTISemantics(MI))
10302
10304}
10305
10306void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
10307 for (MachineInstr &MI : MBB) {
10308 const MachineOperand *Base;
10309 TypeSize Width(0, false);
10310 int64_t Offset;
10311 bool OffsetIsScalable;
10312
10313 // Is this a load or store with an immediate offset with SP as the base?
10314 if (!MI.mayLoadOrStore() ||
10315 !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
10316 &RI) ||
10317 (Base->isReg() && Base->getReg() != AArch64::SP))
10318 continue;
10319
10320 // It is, so we have to fix it up.
10321 TypeSize Scale(0U, false);
10322 int64_t Dummy1, Dummy2;
10323
10324 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
10325 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
10326 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
10327 assert(Scale != 0 && "Unexpected opcode!");
10328 assert(!OffsetIsScalable && "Expected offset to be a byte offset");
10329
10330 // We've pushed the return address to the stack, so add 16 to the offset.
10331 // This is safe, since we already checked if it would overflow when we
10332 // checked if this instruction was legal to outline.
10333 int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedValue();
10334 StackOffsetOperand.setImm(NewImm);
10335 }
10336}
10337
10339 const AArch64InstrInfo *TII,
10340 bool ShouldSignReturnAddr) {
10341 if (!ShouldSignReturnAddr)
10342 return;
10343
10344 BuildMI(MBB, MBB.begin(), DebugLoc(), TII->get(AArch64::PAUTH_PROLOGUE))
10346 BuildMI(MBB, MBB.getFirstInstrTerminator(), DebugLoc(),
10347 TII->get(AArch64::PAUTH_EPILOGUE))
10349}
10350
10351void AArch64InstrInfo::buildOutlinedFrame(
10353 const outliner::OutlinedFunction &OF) const {
10354
10355 AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>();
10356
10357 if (OF.FrameConstructionID == MachineOutlinerTailCall)
10358 FI->setOutliningStyle("Tail Call");
10359 else if (OF.FrameConstructionID == MachineOutlinerThunk) {
10360 // For thunk outlining, rewrite the last instruction from a call to a
10361 // tail-call.
10362 MachineInstr *Call = &*--MBB.instr_end();
10363 unsigned TailOpcode;
10364 if (Call->getOpcode() == AArch64::BL) {
10365 TailOpcode = AArch64::TCRETURNdi;
10366 } else {
10367 assert(Call->getOpcode() == AArch64::BLR ||
10368 Call->getOpcode() == AArch64::BLRNoIP);
10369 TailOpcode = AArch64::TCRETURNriALL;
10370 }
10371 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
10372 .add(Call->getOperand(0))
10373 .addImm(0);
10374 MBB.insert(MBB.end(), TC);
10376
10377 FI->setOutliningStyle("Thunk");
10378 }
10379
10380 bool IsLeafFunction = true;
10381
10382 // Is there a call in the outlined range?
10383 auto IsNonTailCall = [](const MachineInstr &MI) {
10384 return MI.isCall() && !MI.isReturn();
10385 };
10386
10387 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
10388 // Fix up the instructions in the range, since we're going to modify the
10389 // stack.
10390
10391 // Bugzilla ID: 46767
10392 // TODO: Check if fixing up twice is safe so we can outline these.
10393 assert(OF.FrameConstructionID != MachineOutlinerDefault &&
10394 "Can only fix up stack references once");
10395 fixupPostOutline(MBB);
10396
10397 IsLeafFunction = false;
10398
10399 // LR has to be a live in so that we can save it.
10400 if (!MBB.isLiveIn(AArch64::LR))
10401 MBB.addLiveIn(AArch64::LR);
10402
10405
10406 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
10407 OF.FrameConstructionID == MachineOutlinerThunk)
10408 Et = std::prev(MBB.end());
10409
10410 // Insert a save before the outlined region
10411 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
10412 .addReg(AArch64::SP, RegState::Define)
10413 .addReg(AArch64::LR)
10414 .addReg(AArch64::SP)
10415 .addImm(-16);
10416 It = MBB.insert(It, STRXpre);
10417
10418 if (MF.getInfo<AArch64FunctionInfo>()->needsDwarfUnwindInfo(MF)) {
10419 CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameSetup);
10420
10421 // Add a CFI saying the stack was moved 16 B down.
10422 CFIBuilder.buildDefCFAOffset(16);
10423
10424 // Add a CFI saying that the LR that we want to find is now 16 B higher
10425 // than before.
10426 CFIBuilder.buildOffset(AArch64::LR, -16);
10427 }
10428
10429 // Insert a restore before the terminator for the function.
10430 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
10431 .addReg(AArch64::SP, RegState::Define)
10432 .addReg(AArch64::LR, RegState::Define)
10433 .addReg(AArch64::SP)
10434 .addImm(16);
10435 Et = MBB.insert(Et, LDRXpost);
10436 }
10437
10438 bool ShouldSignReturnAddr = FI->shouldSignReturnAddress(!IsLeafFunction);
10439
10440 // If this is a tail call outlined function, then there's already a return.
10441 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
10442 OF.FrameConstructionID == MachineOutlinerThunk) {
10443 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
10444 return;
10445 }
10446
10447 // It's not a tail call, so we have to insert the return ourselves.
10448
10449 // LR has to be a live in so that we can return to it.
10450 if (!MBB.isLiveIn(AArch64::LR))
10451 MBB.addLiveIn(AArch64::LR);
10452
10453 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
10454 .addReg(AArch64::LR);
10455 MBB.insert(MBB.end(), ret);
10456
10457 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
10458
10459 FI->setOutliningStyle("Function");
10460
10461 // Did we have to modify the stack by saving the link register?
10462 if (OF.FrameConstructionID != MachineOutlinerDefault)
10463 return;
10464
10465 // We modified the stack.
10466 // Walk over the basic block and fix up all the stack accesses.
10467 fixupPostOutline(MBB);
10468}
10469
10470MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
10473
10474 // Are we tail calling?
10475 if (C.CallConstructionID == MachineOutlinerTailCall) {
10476 // If yes, then we can just branch to the label.
10477 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
10478 .addGlobalAddress(M.getNamedValue(MF.getName()))
10479 .addImm(0));
10480 return It;
10481 }
10482
10483 // Are we saving the link register?
10484 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
10485 C.CallConstructionID == MachineOutlinerThunk) {
10486 // No, so just insert the call.
10487 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
10488 .addGlobalAddress(M.getNamedValue(MF.getName())));
10489 return It;
10490 }
10491
10492 // We want to return the spot where we inserted the call.
10494
10495 // Instructions for saving and restoring LR around the call instruction we're
10496 // going to insert.
10497 MachineInstr *Save;
10498 MachineInstr *Restore;
10499 // Can we save to a register?
10500 if (C.CallConstructionID == MachineOutlinerRegSave) {
10501 // FIXME: This logic should be sunk into a target-specific interface so that
10502 // we don't have to recompute the register.
10503 Register Reg = findRegisterToSaveLRTo(C);
10504 assert(Reg && "No callee-saved register available?");
10505
10506 // LR has to be a live in so that we can save it.
10507 if (!MBB.isLiveIn(AArch64::LR))
10508 MBB.addLiveIn(AArch64::LR);
10509
10510 // Save and restore LR from Reg.
10511 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
10512 .addReg(AArch64::XZR)
10513 .addReg(AArch64::LR)
10514 .addImm(0);
10515 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
10516 .addReg(AArch64::XZR)
10517 .addReg(Reg)
10518 .addImm(0);
10519 } else {
10520 // We have the default case. Save and restore from SP.
10521 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
10522 .addReg(AArch64::SP, RegState::Define)
10523 .addReg(AArch64::LR)
10524 .addReg(AArch64::SP)
10525 .addImm(-16);
10526 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
10527 .addReg(AArch64::SP, RegState::Define)
10528 .addReg(AArch64::LR, RegState::Define)
10529 .addReg(AArch64::SP)
10530 .addImm(16);
10531 }
10532
10533 It = MBB.insert(It, Save);
10534 It++;
10535
10536 // Insert the call.
10537 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
10538 .addGlobalAddress(M.getNamedValue(MF.getName())));
10539 CallPt = It;
10540 It++;
10541
10542 It = MBB.insert(It, Restore);
10543 return CallPt;
10544}
10545
10546bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
10547 MachineFunction &MF) const {
10548 return MF.getFunction().hasMinSize();
10549}
10550
10551void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
10553 DebugLoc &DL,
10554 bool AllowSideEffects) const {
10555 const MachineFunction &MF = *MBB.getParent();
10556 const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
10557 const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
10558
10559 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
10560 BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
10561 } else if (STI.isSVEorStreamingSVEAvailable()) {
10562 BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
10563 .addImm(0)
10564 .addImm(0);
10565 } else if (STI.isNeonAvailable()) {
10566 BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
10567 .addImm(0);
10568 } else {
10569 // This is a streaming-compatible function without SVE. We don't have full
10570 // Neon (just FPRs), so we can at most use the first 64-bit sub-register.
10571 // So given `movi v..` would be illegal use `fmov d..` instead.
10572 assert(STI.hasNEON() && "Expected to have NEON.");
10573 Register Reg64 = TRI.getSubReg(Reg, AArch64::dsub);
10574 BuildMI(MBB, Iter, DL, get(AArch64::FMOVD0), Reg64);
10575 }
10576}
10577
10578std::optional<DestSourcePair>
10580
10581 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
10582 // and zero immediate operands used as an alias for mov instruction.
10583 if (((MI.getOpcode() == AArch64::ORRWrs &&
10584 MI.getOperand(1).getReg() == AArch64::WZR &&
10585 MI.getOperand(3).getImm() == 0x0) ||
10586 (MI.getOpcode() == AArch64::ORRWrr &&
10587 MI.getOperand(1).getReg() == AArch64::WZR)) &&
10588 // Check that the w->w move is not a zero-extending w->x mov.
10589 (!MI.getOperand(0).getReg().isVirtual() ||
10590 MI.getOperand(0).getSubReg() == 0) &&
10591 (!MI.getOperand(0).getReg().isPhysical() ||
10592 MI.findRegisterDefOperandIdx(getXRegFromWReg(MI.getOperand(0).getReg()),
10593 /*TRI=*/nullptr) == -1))
10594 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10595
10596 if (MI.getOpcode() == AArch64::ORRXrs &&
10597 MI.getOperand(1).getReg() == AArch64::XZR &&
10598 MI.getOperand(3).getImm() == 0x0)
10599 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10600
10601 return std::nullopt;
10602}
10603
10604std::optional<DestSourcePair>
10606 if ((MI.getOpcode() == AArch64::ORRWrs &&
10607 MI.getOperand(1).getReg() == AArch64::WZR &&
10608 MI.getOperand(3).getImm() == 0x0) ||
10609 (MI.getOpcode() == AArch64::ORRWrr &&
10610 MI.getOperand(1).getReg() == AArch64::WZR))
10611 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10612 return std::nullopt;
10613}
10614
10615std::optional<RegImmPair>
10616AArch64InstrInfo::isAddImmediate(const MachineInstr &MI, Register Reg) const {
10617 int Sign = 1;
10618 int64_t Offset = 0;
10619
10620 // TODO: Handle cases where Reg is a super- or sub-register of the
10621 // destination register.
10622 const MachineOperand &Op0 = MI.getOperand(0);
10623 if (!Op0.isReg() || Reg != Op0.getReg())
10624 return std::nullopt;
10625
10626 switch (MI.getOpcode()) {
10627 default:
10628 return std::nullopt;
10629 case AArch64::SUBWri:
10630 case AArch64::SUBXri:
10631 case AArch64::SUBSWri:
10632 case AArch64::SUBSXri:
10633 Sign *= -1;
10634 [[fallthrough]];
10635 case AArch64::ADDSWri:
10636 case AArch64::ADDSXri:
10637 case AArch64::ADDWri:
10638 case AArch64::ADDXri: {
10639 // TODO: Third operand can be global address (usually some string).
10640 if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
10641 !MI.getOperand(2).isImm())
10642 return std::nullopt;
10643 int Shift = MI.getOperand(3).getImm();
10644 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
10645 Offset = Sign * (MI.getOperand(2).getImm() << Shift);
10646 }
10647 }
10648 return RegImmPair{MI.getOperand(1).getReg(), Offset};
10649}
10650
10651/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
10652/// the destination register then, if possible, describe the value in terms of
10653/// the source register.
10654static std::optional<ParamLoadedValue>
10656 const TargetInstrInfo *TII,
10657 const TargetRegisterInfo *TRI) {
10658 auto DestSrc = TII->isCopyLikeInstr(MI);
10659 if (!DestSrc)
10660 return std::nullopt;
10661
10662 Register DestReg = DestSrc->Destination->getReg();
10663 Register SrcReg = DestSrc->Source->getReg();
10664
10665 if (!DestReg.isValid() || !SrcReg.isValid())
10666 return std::nullopt;
10667
10668 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
10669
10670 // If the described register is the destination, just return the source.
10671 if (DestReg == DescribedReg)
10672 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
10673
10674 // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
10675 if (MI.getOpcode() == AArch64::ORRWrs &&
10676 TRI->isSuperRegister(DestReg, DescribedReg))
10677 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
10678
10679 // We may need to describe the lower part of a ORRXrs move.
10680 if (MI.getOpcode() == AArch64::ORRXrs &&
10681 TRI->isSubRegister(DestReg, DescribedReg)) {
10682 Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
10683 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
10684 }
10685
10686 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
10687 "Unhandled ORR[XW]rs copy case");
10688
10689 return std::nullopt;
10690}
10691
10692bool AArch64InstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const {
10693 // Functions cannot be split to different sections on AArch64 if they have
10694 // a red zone. This is because relaxing a cross-section branch may require
10695 // incrementing the stack pointer to spill a register, which would overwrite
10696 // the red zone.
10697 if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(true))
10698 return false;
10699
10701}
10702
10703bool AArch64InstrInfo::isMBBSafeToSplitToCold(
10704 const MachineBasicBlock &MBB) const {
10705 // Asm Goto blocks can contain conditional branches to goto labels, which can
10706 // get moved out of range of the branch instruction.
10707 auto isAsmGoto = [](const MachineInstr &MI) {
10708 return MI.getOpcode() == AArch64::INLINEASM_BR;
10709 };
10710 if (llvm::any_of(MBB, isAsmGoto) || MBB.isInlineAsmBrIndirectTarget())
10711 return false;
10712
10713 // Because jump tables are label-relative instead of table-relative, they all
10714 // must be in the same section or relocation fixup handling will fail.
10715
10716 // Check if MBB is a jump table target
10717 const MachineJumpTableInfo *MJTI = MBB.getParent()->getJumpTableInfo();
10718 auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) {
10719 return llvm::is_contained(JTE.MBBs, &MBB);
10720 };
10721 if (MJTI != nullptr && llvm::any_of(MJTI->getJumpTables(), containsMBB))
10722 return false;
10723
10724 // Check if MBB contains a jump table lookup
10725 for (const MachineInstr &MI : MBB) {
10726 switch (MI.getOpcode()) {
10727 case TargetOpcode::G_BRJT:
10728 case AArch64::JumpTableDest32:
10729 case AArch64::JumpTableDest16:
10730 case AArch64::JumpTableDest8:
10731 return false;
10732 default:
10733 continue;
10734 }
10735 }
10736
10737 // MBB isn't a special case, so it's safe to be split to the cold section.
10738 return true;
10739}
10740
10741std::optional<ParamLoadedValue>
10742AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
10743 Register Reg) const {
10744 const MachineFunction *MF = MI.getMF();
10745 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
10746 switch (MI.getOpcode()) {
10747 case AArch64::MOVZWi:
10748 case AArch64::MOVZXi: {
10749 // MOVZWi may be used for producing zero-extended 32-bit immediates in
10750 // 64-bit parameters, so we need to consider super-registers.
10751 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
10752 return std::nullopt;
10753
10754 if (!MI.getOperand(1).isImm())
10755 return std::nullopt;
10756 int64_t Immediate = MI.getOperand(1).getImm();
10757 int Shift = MI.getOperand(2).getImm();
10758 return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
10759 nullptr);
10760 }
10761 case AArch64::ORRWrs:
10762 case AArch64::ORRXrs:
10763 return describeORRLoadedValue(MI, Reg, this, TRI);
10764 }
10765
10767}
10768
10769bool AArch64InstrInfo::isExtendLikelyToBeFolded(
10770 MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
10771 assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||
10772 ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||
10773 ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
10774
10775 // Anyexts are nops.
10776 if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
10777 return true;
10778
10779 Register DefReg = ExtMI.getOperand(0).getReg();
10780 if (!MRI.hasOneNonDBGUse(DefReg))
10781 return false;
10782
10783 // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
10784 // addressing mode.
10785 auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg);
10786 return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
10787}
10788
10789uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
10790 return get(Opc).TSFlags & AArch64::ElementSizeMask;
10791}
10792
10793bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
10794 return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
10795}
10796
10797bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
10798 return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
10799}
10800
10801unsigned int
10802AArch64InstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
10803 return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
10804}
10805
10806bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
10807 unsigned Scale) const {
10808 if (Offset && Scale)
10809 return false;
10810
10811 // Check Reg + Imm
10812 if (!Scale) {
10813 // 9-bit signed offset
10814 if (isInt<9>(Offset))
10815 return true;
10816
10817 // 12-bit unsigned offset
10818 unsigned Shift = Log2_64(NumBytes);
10819 if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
10820 // Must be a multiple of NumBytes (NumBytes is a power of 2)
10821 (Offset >> Shift) << Shift == Offset)
10822 return true;
10823 return false;
10824 }
10825
10826 // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
10827 return Scale == 1 || (Scale > 0 && Scale == NumBytes);
10828}
10829
10831 if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
10832 return AArch64::BLRNoIP;
10833 else
10834 return AArch64::BLR;
10835}
10836
10839 Register TargetReg, bool FrameSetup) const {
10840 assert(TargetReg != AArch64::SP && "New top of stack cannot already be in SP");
10841
10842 MachineBasicBlock &MBB = *MBBI->getParent();
10843 MachineFunction &MF = *MBB.getParent();
10844 const AArch64InstrInfo *TII =
10845 MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
10846 int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
10847 DebugLoc DL = MBB.findDebugLoc(MBBI);
10848
10849 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
10850 MachineBasicBlock *LoopTestMBB =
10851 MF.CreateMachineBasicBlock(MBB.getBasicBlock());
10852 MF.insert(MBBInsertPoint, LoopTestMBB);
10853 MachineBasicBlock *LoopBodyMBB =
10854 MF.CreateMachineBasicBlock(MBB.getBasicBlock());
10855 MF.insert(MBBInsertPoint, LoopBodyMBB);
10856 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
10857 MF.insert(MBBInsertPoint, ExitMBB);
10858 MachineInstr::MIFlag Flags =
10860
10861 // LoopTest:
10862 // SUB SP, SP, #ProbeSize
10863 emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
10864 AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags);
10865
10866 // CMP SP, TargetReg
10867 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
10868 AArch64::XZR)
10869 .addReg(AArch64::SP)
10870 .addReg(TargetReg)
10872 .setMIFlags(Flags);
10873
10874 // B.<Cond> LoopExit
10875 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
10877 .addMBB(ExitMBB)
10878 .setMIFlags(Flags);
10879
10880 // STR XZR, [SP]
10881 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
10882 .addReg(AArch64::XZR)
10883 .addReg(AArch64::SP)
10884 .addImm(0)
10885 .setMIFlags(Flags);
10886
10887 // B loop
10888 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
10889 .addMBB(LoopTestMBB)
10890 .setMIFlags(Flags);
10891
10892 // LoopExit:
10893 // MOV SP, TargetReg
10894 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP)
10895 .addReg(TargetReg)
10896 .addImm(0)
10898 .setMIFlags(Flags);
10899
10900 // LDR XZR, [SP]
10901 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::LDRXui))
10902 .addReg(AArch64::XZR, RegState::Define)
10903 .addReg(AArch64::SP)
10904 .addImm(0)
10905 .setMIFlags(Flags);
10906
10907 ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
10909
10910 LoopTestMBB->addSuccessor(ExitMBB);
10911 LoopTestMBB->addSuccessor(LoopBodyMBB);
10912 LoopBodyMBB->addSuccessor(LoopTestMBB);
10913 MBB.addSuccessor(LoopTestMBB);
10914
10915 // Update liveins.
10916 if (MF.getRegInfo().reservedRegsFrozen())
10917 fullyRecomputeLiveIns({ExitMBB, LoopBodyMBB, LoopTestMBB});
10918
10919 return ExitMBB->begin();
10920}
10921
10922namespace {
10923class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
10924 MachineFunction *MF;
10925 const TargetInstrInfo *TII;
10926 const TargetRegisterInfo *TRI;
10928
10929 /// The block of the loop
10930 MachineBasicBlock *LoopBB;
10931 /// The conditional branch of the loop
10932 MachineInstr *CondBranch;
10933 /// The compare instruction for loop control
10934 MachineInstr *Comp;
10935 /// The number of the operand of the loop counter value in Comp
10936 unsigned CompCounterOprNum;
10937 /// The instruction that updates the loop counter value
10938 MachineInstr *Update;
10939 /// The number of the operand of the loop counter value in Update
10940 unsigned UpdateCounterOprNum;
10941 /// The initial value of the loop counter
10942 Register Init;
10943 /// True iff Update is a predecessor of Comp
10944 bool IsUpdatePriorComp;
10945
10946 /// The normalized condition used by createTripCountGreaterCondition()
10948
10949public:
10950 AArch64PipelinerLoopInfo(MachineBasicBlock *LoopBB, MachineInstr *CondBranch,
10951 MachineInstr *Comp, unsigned CompCounterOprNum,
10952 MachineInstr *Update, unsigned UpdateCounterOprNum,
10953 Register Init, bool IsUpdatePriorComp,
10955 : MF(Comp->getParent()->getParent()),
10956 TII(MF->getSubtarget().getInstrInfo()),
10957 TRI(MF->getSubtarget().getRegisterInfo()), MRI(MF->getRegInfo()),
10958 LoopBB(LoopBB), CondBranch(CondBranch), Comp(Comp),
10959 CompCounterOprNum(CompCounterOprNum), Update(Update),
10960 UpdateCounterOprNum(UpdateCounterOprNum), Init(Init),
10961 IsUpdatePriorComp(IsUpdatePriorComp), Cond(Cond.begin(), Cond.end()) {}
10962
10963 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
10964 // Make the instructions for loop control be placed in stage 0.
10965 // The predecessors of Comp are considered by the caller.
10966 return MI == Comp;
10967 }
10968
10969 std::optional<bool> createTripCountGreaterCondition(
10970 int TC, MachineBasicBlock &MBB,
10971 SmallVectorImpl<MachineOperand> &CondParam) override {
10972 // A branch instruction will be inserted as "if (Cond) goto epilogue".
10973 // Cond is normalized for such use.
10974 // The predecessors of the branch are assumed to have already been inserted.
10975 CondParam = Cond;
10976 return {};
10977 }
10978
10979 void createRemainingIterationsGreaterCondition(
10980 int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond,
10981 DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) override;
10982
10983 void setPreheader(MachineBasicBlock *NewPreheader) override {}
10984
10985 void adjustTripCount(int TripCountAdjust) override {}
10986
10987 bool isMVEExpanderSupported() override { return true; }
10988};
10989} // namespace
10990
10991/// Clone an instruction from MI. The register of ReplaceOprNum-th operand
10992/// is replaced by ReplaceReg. The output register is newly created.
10993/// The other operands are unchanged from MI.
10994static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum,
10995 Register ReplaceReg, MachineBasicBlock &MBB,
10996 MachineBasicBlock::iterator InsertTo) {
10997 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
10998 const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
10999 const TargetRegisterInfo *TRI =
11000 MBB.getParent()->getSubtarget().getRegisterInfo();
11001 MachineInstr *NewMI = MBB.getParent()->CloneMachineInstr(MI);
11002 Register Result = 0;
11003 for (unsigned I = 0; I < NewMI->getNumOperands(); ++I) {
11004 if (I == 0 && NewMI->getOperand(0).getReg().isVirtual()) {
11005 Result = MRI.createVirtualRegister(
11006 MRI.getRegClass(NewMI->getOperand(0).getReg()));
11007 NewMI->getOperand(I).setReg(Result);
11008 } else if (I == ReplaceOprNum) {
11009 MRI.constrainRegClass(ReplaceReg,
11010 TII->getRegClass(NewMI->getDesc(), I, TRI));
11011 NewMI->getOperand(I).setReg(ReplaceReg);
11012 }
11013 }
11014 MBB.insert(InsertTo, NewMI);
11015 return Result;
11016}
11017
11018void AArch64PipelinerLoopInfo::createRemainingIterationsGreaterCondition(
11021 // Create and accumulate conditions for next TC iterations.
11022 // Example:
11023 // SUBSXrr N, counter, implicit-def $nzcv # compare instruction for the last
11024 // # iteration of the kernel
11025 //
11026 // # insert the following instructions
11027 // cond = CSINCXr 0, 0, C, implicit $nzcv
11028 // counter = ADDXri counter, 1 # clone from this->Update
11029 // SUBSXrr n, counter, implicit-def $nzcv # clone from this->Comp
11030 // cond = CSINCXr cond, cond, C, implicit $nzcv
11031 // ... (repeat TC times)
11032 // SUBSXri cond, 0, implicit-def $nzcv
11033
11034 assert(CondBranch->getOpcode() == AArch64::Bcc);
11035 // CondCode to exit the loop
11037 (AArch64CC::CondCode)CondBranch->getOperand(0).getImm();
11038 if (CondBranch->getOperand(1).getMBB() == LoopBB)
11040
11041 // Accumulate conditions to exit the loop
11042 Register AccCond = AArch64::XZR;
11043
11044 // If CC holds, CurCond+1 is returned; otherwise CurCond is returned.
11045 auto AccumulateCond = [&](Register CurCond,
11047 Register NewCond = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
11048 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::CSINCXr))
11049 .addReg(NewCond, RegState::Define)
11050 .addReg(CurCond)
11051 .addReg(CurCond)
11053 return NewCond;
11054 };
11055
11056 if (!LastStage0Insts.empty() && LastStage0Insts[Comp]->getParent() == &MBB) {
11057 // Update and Comp for I==0 are already exists in MBB
11058 // (MBB is an unrolled kernel)
11059 Register Counter;
11060 for (int I = 0; I <= TC; ++I) {
11061 Register NextCounter;
11062 if (I != 0)
11063 NextCounter =
11064 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
11065
11066 AccCond = AccumulateCond(AccCond, CC);
11067
11068 if (I != TC) {
11069 if (I == 0) {
11070 if (Update != Comp && IsUpdatePriorComp) {
11071 Counter =
11072 LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
11073 NextCounter = cloneInstr(Update, UpdateCounterOprNum, Counter, MBB,
11074 MBB.end());
11075 } else {
11076 // can use already calculated value
11077 NextCounter = LastStage0Insts[Update]->getOperand(0).getReg();
11078 }
11079 } else if (Update != Comp) {
11080 NextCounter =
11081 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11082 }
11083 }
11084 Counter = NextCounter;
11085 }
11086 } else {
11087 Register Counter;
11088 if (LastStage0Insts.empty()) {
11089 // use initial counter value (testing if the trip count is sufficient to
11090 // be executed by pipelined code)
11091 Counter = Init;
11092 if (IsUpdatePriorComp)
11093 Counter =
11094 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11095 } else {
11096 // MBB is an epilogue block. LastStage0Insts[Comp] is in the kernel block.
11097 Counter = LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
11098 }
11099
11100 for (int I = 0; I <= TC; ++I) {
11101 Register NextCounter;
11102 NextCounter =
11103 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
11104 AccCond = AccumulateCond(AccCond, CC);
11105 if (I != TC && Update != Comp)
11106 NextCounter =
11107 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11108 Counter = NextCounter;
11109 }
11110 }
11111
11112 // If AccCond == 0, the remainder is greater than TC.
11113 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::SUBSXri))
11114 .addReg(AArch64::XZR, RegState::Define | RegState::Dead)
11115 .addReg(AccCond)
11116 .addImm(0)
11117 .addImm(0);
11118 Cond.clear();
11120}
11121
11122static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB,
11123 Register &RegMBB, Register &RegOther) {
11124 assert(Phi.getNumOperands() == 5);
11125 if (Phi.getOperand(2).getMBB() == MBB) {
11126 RegMBB = Phi.getOperand(1).getReg();
11127 RegOther = Phi.getOperand(3).getReg();
11128 } else {
11129 assert(Phi.getOperand(4).getMBB() == MBB);
11130 RegMBB = Phi.getOperand(3).getReg();
11131 RegOther = Phi.getOperand(1).getReg();
11132 }
11133}
11134
11136 if (!Reg.isVirtual())
11137 return false;
11138 const MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
11139 return MRI.getVRegDef(Reg)->getParent() != BB;
11140}
11141
11142/// If Reg is an induction variable, return true and set some parameters
11143static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB,
11144 MachineInstr *&UpdateInst,
11145 unsigned &UpdateCounterOprNum, Register &InitReg,
11146 bool &IsUpdatePriorComp) {
11147 // Example:
11148 //
11149 // Preheader:
11150 // InitReg = ...
11151 // LoopBB:
11152 // Reg0 = PHI (InitReg, Preheader), (Reg1, LoopBB)
11153 // Reg = COPY Reg0 ; COPY is ignored.
11154 // Reg1 = ADD Reg, #1; UpdateInst. Incremented by a loop invariant value.
11155 // ; Reg is the value calculated in the previous
11156 // ; iteration, so IsUpdatePriorComp == false.
11157
11158 if (LoopBB->pred_size() != 2)
11159 return false;
11160 if (!Reg.isVirtual())
11161 return false;
11162 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
11163 UpdateInst = nullptr;
11164 UpdateCounterOprNum = 0;
11165 InitReg = 0;
11166 IsUpdatePriorComp = true;
11167 Register CurReg = Reg;
11168 while (true) {
11169 MachineInstr *Def = MRI.getVRegDef(CurReg);
11170 if (Def->getParent() != LoopBB)
11171 return false;
11172 if (Def->isCopy()) {
11173 // Ignore copy instructions unless they contain subregisters
11174 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
11175 return false;
11176 CurReg = Def->getOperand(1).getReg();
11177 } else if (Def->isPHI()) {
11178 if (InitReg != 0)
11179 return false;
11180 if (!UpdateInst)
11181 IsUpdatePriorComp = false;
11182 extractPhiReg(*Def, LoopBB, CurReg, InitReg);
11183 } else {
11184 if (UpdateInst)
11185 return false;
11186 switch (Def->getOpcode()) {
11187 case AArch64::ADDSXri:
11188 case AArch64::ADDSWri:
11189 case AArch64::SUBSXri:
11190 case AArch64::SUBSWri:
11191 case AArch64::ADDXri:
11192 case AArch64::ADDWri:
11193 case AArch64::SUBXri:
11194 case AArch64::SUBWri:
11195 UpdateInst = Def;
11196 UpdateCounterOprNum = 1;
11197 break;
11198 case AArch64::ADDSXrr:
11199 case AArch64::ADDSWrr:
11200 case AArch64::SUBSXrr:
11201 case AArch64::SUBSWrr:
11202 case AArch64::ADDXrr:
11203 case AArch64::ADDWrr:
11204 case AArch64::SUBXrr:
11205 case AArch64::SUBWrr:
11206 UpdateInst = Def;
11207 if (isDefinedOutside(Def->getOperand(2).getReg(), LoopBB))
11208 UpdateCounterOprNum = 1;
11209 else if (isDefinedOutside(Def->getOperand(1).getReg(), LoopBB))
11210 UpdateCounterOprNum = 2;
11211 else
11212 return false;
11213 break;
11214 default:
11215 return false;
11216 }
11217 CurReg = Def->getOperand(UpdateCounterOprNum).getReg();
11218 }
11219
11220 if (!CurReg.isVirtual())
11221 return false;
11222 if (Reg == CurReg)
11223 break;
11224 }
11225
11226 if (!UpdateInst)
11227 return false;
11228
11229 return true;
11230}
11231
11232std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
11234 // Accept loops that meet the following conditions
11235 // * The conditional branch is BCC
11236 // * The compare instruction is ADDS/SUBS/WHILEXX
11237 // * One operand of the compare is an induction variable and the other is a
11238 // loop invariant value
11239 // * The induction variable is incremented/decremented by a single instruction
11240 // * Does not contain CALL or instructions which have unmodeled side effects
11241
11242 for (MachineInstr &MI : *LoopBB)
11243 if (MI.isCall() || MI.hasUnmodeledSideEffects())
11244 // This instruction may use NZCV, which interferes with the instruction to
11245 // be inserted for loop control.
11246 return nullptr;
11247
11248 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
11250 if (analyzeBranch(*LoopBB, TBB, FBB, Cond))
11251 return nullptr;
11252
11253 // Infinite loops are not supported
11254 if (TBB == LoopBB && FBB == LoopBB)
11255 return nullptr;
11256
11257 // Must be conditional branch
11258 if (TBB != LoopBB && FBB == nullptr)
11259 return nullptr;
11260
11261 assert((TBB == LoopBB || FBB == LoopBB) &&
11262 "The Loop must be a single-basic-block loop");
11263
11264 MachineInstr *CondBranch = &*LoopBB->getFirstTerminator();
11266
11267 if (CondBranch->getOpcode() != AArch64::Bcc)
11268 return nullptr;
11269
11270 // Normalization for createTripCountGreaterCondition()
11271 if (TBB == LoopBB)
11273
11274 MachineInstr *Comp = nullptr;
11275 unsigned CompCounterOprNum = 0;
11276 for (MachineInstr &MI : reverse(*LoopBB)) {
11277 if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {
11278 // Guarantee that the compare is SUBS/ADDS/WHILEXX and that one of the
11279 // operands is a loop invariant value
11280
11281 switch (MI.getOpcode()) {
11282 case AArch64::SUBSXri:
11283 case AArch64::SUBSWri:
11284 case AArch64::ADDSXri:
11285 case AArch64::ADDSWri:
11286 Comp = &MI;
11287 CompCounterOprNum = 1;
11288 break;
11289 case AArch64::ADDSWrr:
11290 case AArch64::ADDSXrr:
11291 case AArch64::SUBSWrr:
11292 case AArch64::SUBSXrr:
11293 Comp = &MI;
11294 break;
11295 default:
11296 if (isWhileOpcode(MI.getOpcode())) {
11297 Comp = &MI;
11298 break;
11299 }
11300 return nullptr;
11301 }
11302
11303 if (CompCounterOprNum == 0) {
11304 if (isDefinedOutside(Comp->getOperand(1).getReg(), LoopBB))
11305 CompCounterOprNum = 2;
11306 else if (isDefinedOutside(Comp->getOperand(2).getReg(), LoopBB))
11307 CompCounterOprNum = 1;
11308 else
11309 return nullptr;
11310 }
11311 break;
11312 }
11313 }
11314 if (!Comp)
11315 return nullptr;
11316
11317 MachineInstr *Update = nullptr;
11318 Register Init;
11319 bool IsUpdatePriorComp;
11320 unsigned UpdateCounterOprNum;
11321 if (!getIndVarInfo(Comp->getOperand(CompCounterOprNum).getReg(), LoopBB,
11322 Update, UpdateCounterOprNum, Init, IsUpdatePriorComp))
11323 return nullptr;
11324
11325 return std::make_unique<AArch64PipelinerLoopInfo>(
11326 LoopBB, CondBranch, Comp, CompCounterOprNum, Update, UpdateCounterOprNum,
11327 Init, IsUpdatePriorComp, Cond);
11328}
11329
11330/// verifyInstruction - Perform target specific instruction verification.
11331bool AArch64InstrInfo::verifyInstruction(const MachineInstr &MI,
11332 StringRef &ErrInfo) const {
11333 // Verify that immediate offsets on load/store instructions are within range.
11334 // Stack objects with an FI operand are excluded as they can be fixed up
11335 // during PEI.
11336 TypeSize Scale(0U, false), Width(0U, false);
11337 int64_t MinOffset, MaxOffset;
11338 if (getMemOpInfo(MI.getOpcode(), Scale, Width, MinOffset, MaxOffset)) {
11339 unsigned ImmIdx = getLoadStoreImmIdx(MI.getOpcode());
11340 if (MI.getOperand(ImmIdx).isImm() && !MI.getOperand(ImmIdx - 1).isFI()) {
11341 int64_t Imm = MI.getOperand(ImmIdx).getImm();
11342 if (Imm < MinOffset || Imm > MaxOffset) {
11343 ErrInfo = "Unexpected immediate on load/store instruction";
11344 return false;
11345 }
11346 }
11347 }
11348
11349 const MCInstrDesc &MCID = MI.getDesc();
11350 for (unsigned Op = 0; Op < MCID.getNumOperands(); Op++) {
11351 const MachineOperand &MO = MI.getOperand(Op);
11352 switch (MCID.operands()[Op].OperandType) {
11354 if (!MO.isImm() || MO.getImm() != 0) {
11355 ErrInfo = "OPERAND_IMPLICIT_IMM_0 should be 0";
11356 return false;
11357 }
11358 break;
11360 if (!MO.isImm() ||
11362 (AArch64_AM::getShiftValue(MO.getImm()) != 8 &&
11363 AArch64_AM::getShiftValue(MO.getImm()) != 16)) {
11364 ErrInfo = "OPERAND_SHIFT_MSL should be msl shift of 8 or 16";
11365 return false;
11366 }
11367 break;
11368 default:
11369 break;
11370 }
11371 }
11372 return true;
11373}
11374
11375#define GET_INSTRINFO_HELPERS
11376#define GET_INSTRMAP_INFO
11377#include "AArch64GenInstrInfo.inc"
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, unsigned NumRegs)
static cl::opt< unsigned > BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)"))
static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned MnegOpc, const TargetRegisterClass *RC)
genNeg - Helper to generate an intermediate negation of the second operand of Root
static cl::opt< unsigned > GatherOptSearchLimit("aarch64-search-limit", cl::Hidden, cl::init(2048), cl::desc("Restrict range of instructions to search for the " "machine-combiner gather pattern optimization"))
static bool getMaddPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find instructions that can be turned into madd.
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr)
Find a condition code used by the instruction.
static MachineInstr * genFusedMultiplyAcc(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyAcc - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genFusedMultiplyAccNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCombineInstrCandidate64(unsigned Opc)
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg)
static bool areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI, const AccessKind AccessToCheck=AK_All)
True when condition flags are accessed (either by writing or reading) on the instruction trace starti...
static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Floating-Point Support.
static bool isADDSRegImm(unsigned Opcode)
static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment, StringRef RegScale={})
static unsigned sForm(MachineInstr &Instr)
Get opcode of S version of Instr.
static bool isCombineInstrSettingFlag(unsigned Opc)
static bool getFNEGPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB, MachineInstr *&UpdateInst, unsigned &UpdateCounterOprNum, Register &InitReg, bool &IsUpdatePriorComp)
If Reg is an induction variable, return true and set some parameters.
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, MCRegister Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc)
static int findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr)
static bool isPostIndexLdStOpcode(unsigned Opcode)
Return true if the opcode is a post-index ld/st instruction, which really loads from base+0.
static unsigned getBranchDisplacementBits(unsigned Opc)
static cl::opt< unsigned > CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9), cl::desc("Restrict range of CB instructions (DEBUG)"))
static std::optional< ParamLoadedValue > describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
If the given ORR instruction is a copy, and DescribedReg overlaps with the destination register then,...
static bool getFMULPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static void appendReadRegExpr(SmallVectorImpl< char > &Expr, unsigned RegNum)
static MachineInstr * genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, const TargetRegisterClass *RC)
genMaddR - Generate madd instruction and combine mul and add using an extra virtual register Example ...
static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, Register ReplaceReg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertTo)
Clone an instruction from MI.
static bool scaleOffset(unsigned Opc, int64_t &Offset)
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc)
unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale)
static MachineInstr * genFusedMultiplyIdx(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyIdx - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genIndexedMultiply(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxDupOp, unsigned MulOpc, const TargetRegisterClass *RC, MachineRegisterInfo &MRI)
Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
static bool isSUBSRegImm(unsigned Opcode)
static bool UpdateOperandRegClass(MachineInstr &Instr)
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr, int CmpValue, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > &CCUseInstrs, bool &IsInvertCC)
unsigned unscaledOffsetOpcode(unsigned Opcode)
static bool getLoadPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Search for patterns of LD instructions we can optimize.
static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI)
Check if CmpInstr can be substituted by MI.
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC)
static bool isCombineInstrCandidateFP(const MachineInstr &Inst)
static void appendLoadRegExpr(SmallVectorImpl< char > &Expr, int64_t OffsetFromDefCFA)
static void appendConstantExpr(SmallVectorImpl< char > &Expr, int64_t Constant, dwarf::LocationAtom Operation)
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI)
Return the opcode that does not set flags when possible - otherwise return the original opcode.
static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool isCombineInstrCandidate32(unsigned Opc)
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
static unsigned offsetExtendOpcode(unsigned Opcode)
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ UnsafeRegsDead
static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register DestReg, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
static void generateGatherLanePattern(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned Pattern, unsigned NumLanes)
Generate optimized instruction sequence for gather load patterns to improve Memory-Level Parallelism ...
static bool getMiscPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find other MI combine patterns.
static bool outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, int64_t Offset1, unsigned Opcode1, int FI2, int64_t Offset2, unsigned Opcode2)
static cl::opt< unsigned > TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"))
static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB, Register &RegMBB, Register &RegOther)
static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, unsigned Reg, const StackOffset &Offset)
static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB)
static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind=FMAInstKind::Default, const Register *ReplacedAddend=nullptr)
genFusedMultiply - Generate fused multiply instructions.
static bool getGatherLanePattern(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, unsigned LoadLaneOpCode, unsigned NumLanes)
Check if the given instruction forms a gather load pattern that can be optimized for better Memory-Le...
static MachineInstr * genFusedMultiplyIdxNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCombineInstrCandidate(unsigned Opc)
static unsigned regOffsetOpcode(unsigned Opcode)
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
static cl::opt< unsigned > BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26), cl::desc("Restrict range of B instructions (DEBUG)"))
static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB)
Check if AArch64::NZCV should be alive in successors of MBB.
static void emitFrameOffsetAdj(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int64_t Offset, unsigned Opc, const TargetInstrInfo *TII, MachineInstr::MIFlag Flag, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFAOffset, StackOffset CFAOffset, unsigned FrameReg)
static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize)
static cl::opt< unsigned > CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"))
static void genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, unsigned IdxOpd1, DenseMap< Register, unsigned > &InstrIdxForVirtReg)
Do the following transformation A - (B + C) ==> (A - B) - C A - (B + C) ==> (A - C) - B.
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewReg=nullptr)
static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64InstrInfo *TII, bool ShouldSignReturnAddr)
static MachineInstr * genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs)
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc, unsigned ZeroReg)
static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register SrcReg, bool IsKill, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
@ Default
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
PowerPC Reduce CR logical Operation
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc=0)
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool shouldSignReturnAddress(const MachineFunction &MF) const
void setOutliningStyle(const std::string &Style)
std::optional< bool > hasRedZone() const
static bool isHForm(const MachineInstr &MI)
Returns whether the instruction is in H form (16 bit operands)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool hasBTISemantics(const MachineInstr &MI)
Returns whether the instruction can be compatible with non-zero BTYPE.
static bool isQForm(const MachineInstr &MI)
Returns whether the instruction is in Q form (128 bit operands)
static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, int64_t &MinOffset, int64_t &MaxOffset)
Returns true if opcode Opc is a memory operation.
static bool isTailCallReturnInst(const MachineInstr &MI)
Returns true if MI is one of the TCRETURN* instructions.
static bool isFPRCopy(const MachineInstr &MI)
Does this instruction rename an FPR without modifying bits?
MachineInstr * emitLdStWithAddr(MachineInstr &MemI, const ExtAddrMode &AM) const override
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
static bool isGPRCopy(const MachineInstr &MI)
Does this instruction rename a GPR without modifying bits?
static unsigned convertToFlagSettingOpc(unsigned Opc)
Return the opcode that set flags when possible.
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
static std::optional< unsigned > getUnscaledLdSt(unsigned Opc)
Returns the unscaled load/store for the scaled load/store opcode, if there is a corresponding unscale...
static bool hasUnscaledLdStOffset(unsigned Opc)
Return true if it has an unscaled load/store offset.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
std::optional< ExtAddrMode > getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &MI, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
bool analyzeBranchPredicate(MachineBasicBlock &MBB, MachineBranchPredicate &MBP, bool AllowModify) const override
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
static bool isPairableLdStInst(const MachineInstr &MI)
Return true if pairing the given load or store may be paired with another.
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
static bool isPreSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed store.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
AArch64InstrInfo(const AArch64Subtarget &STI)
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
bool getMemOperandWithOffsetWidth(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, TypeSize &Width, const TargetRegisterInfo *TRI) const
If OffsetIsScalable is set to 'true', the offset is scaled by vscale.
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isStridedAccess(const MachineInstr &MI)
Return true if the given load or store is a strided memory access.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Detect opportunities for ldp/stp formation.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
bool isThroughputPattern(unsigned Pattern) const override
Return true when a code sequence can improve throughput.
MachineOperand & getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const
Return the immediate offset of the base register in a load/store LdSt.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, const MachineInstr &AddrI, ExtAddrMode &AM) const override
static bool isLdStPairSuppressed(const MachineInstr &MI)
Return true if pairing the given load or store is hinted to be unprofitable.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI, Register TargetReg, bool FrameSetup) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction supplying the argument to the comparison into one that...
static unsigned getLoadStoreImmIdx(unsigned Opc)
Returns the index for the immediate for a given instruction.
static bool isGPRZero(const MachineInstr &MI)
Does this instruction set its full destination register to zero?
void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, unsigned ZeroReg, llvm::ArrayRef< unsigned > Indices) const
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2,...
CombinerObjective getCombinerObjective(unsigned Pattern) const override
static bool isFpOrNEON(Register Reg)
Returns whether the physical register is FP or NEON.
bool isAsCheapAsAMove(const MachineInstr &MI) const override
std::optional< DestSourcePair > isCopyLikeInstrImpl(const MachineInstr &MI) const override
static void suppressLdStPair(MachineInstr &MI)
Hint that pairing the given load or store is unprofitable.
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static bool isPreLd(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef< unsigned > Indices) const
bool optimizeCondBranch(MachineInstr &MI) const override
Replace csincr-branch sequence by simple conditional branch.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
bool isCandidateToMergeOrPair(const MachineInstr &MI) const
Return true if this is a load/store that can be potentially paired/merged.
MCInst getNop() const override
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const
const AArch64RegisterInfo * getRegisterInfo() const override
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition ArrayRef.h:146
size_t size() const
size - Get the array size.
Definition ArrayRef.h:143
This is an important base class in LLVM.
Definition Constant.h:43
A debug info location.
Definition DebugLoc.h:124
bool empty() const
Definition DenseMap.h:109
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:233
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
static LocationSize precise(uint64_t Value)
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition MCAsmInfo.h:64
bool usesWindowsCFI() const
Definition MCAsmInfo.h:652
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition MCDwarf.h:585
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition MCDwarf.h:627
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition MCDwarf.h:600
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition MCDwarf.h:697
MCInstBuilder & addImm(int64_t Val)
Add a new integer immediate operand.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
constexpr bool isValid() const
Definition MCRegister.h:76
static constexpr unsigned NoRegister
Definition MCRegister.h:52
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
Set of metadata that should be preserved when using BuildMI().
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
reverse_instr_iterator instr_rbegin()
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
Instructions::const_iterator const_instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
void setStackID(int ObjectIdx, uint8_t ID)
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
bool isCopy() const
const MachineBasicBlock * getParent() const
bool isCall(QueryType Type=AnyInBundle) const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
LLVM_ABI uint32_t mergeFlagsWith(const MachineInstr &Other) const
Return the MIFlags which represent both MachineInstrs.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool registerDefIsDead(Register Reg, const TargetRegisterInfo *TRI) const
Returns true if the register is dead in this machine instruction.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
LLVM_ABI bool isLoadFoldBarrier() const
Returns true if it is illegal to fold a load across this instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
LLVM_ABI MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
MI-level patchpoint operands.
Definition StackMaps.h:77
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition StackMaps.h:105
Wrapper class representing virtual and physical registers.
Definition Register.h:20
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:107
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:66
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
Represents a location in source code.
Definition SMLoc.h:22
bool erase(PtrType Ptr)
Remove pointer from the set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
bool empty() const
Definition SmallSet.h:168
bool erase(const T &V)
Definition SmallSet.h:199
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
MI-level stackmap operands.
Definition StackMaps.h:36
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition StackMaps.h:51
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:47
int64_t getScalable() const
Returns the scalable component of the stack.
Definition TypeSize.h:50
static StackOffset get(int64_t Fixed, int64_t Scalable)
Definition TypeSize.h:42
static StackOffset getScalable(int64_t Scalable)
Definition TypeSize.h:41
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:40
MI-level Statepoint operands.
Definition StackMaps.h:159
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given statepoint should emit.
Definition StackMaps.h:208
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual bool isFunctionSafeToSplit(const MachineFunction &MF) const
Return true if the function is a viable candidate for machine function splitting.
TargetOptions Options
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Target - Wrapper for Target specific information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:344
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:347
Value * getOperand(unsigned i) const
Definition User.h:232
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an std::string.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_S
MO_S - Indicates that the bits of the symbol operand represented by MO_G0 etc are signed.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_ARM64EC_CALLMANGLE
MO_ARM64EC_CALLMANGLE - Operand refers to the Arm64EC-mangled version of a symbol,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_HI12
MO_HI12 - This flag indicates that a symbol operand represents the bits 13-24 of a 64-bit address,...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
unsigned getCheckerSizeInBytes(AuthCheckMethod Method)
Returns the number of bytes added by checkAuthenticatedRegister.
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static unsigned getArithShiftValue(unsigned Imm)
getArithShiftValue - get the arithmetic shift value.
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static const uint64_t InstrFlagIsWhile
static const uint64_t InstrFlagIsPTestLike
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Renamable
Register that may be renamed.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
constexpr double e
InstrType
Represents how an instruction should be mapped by the outliner.
LLVM_ABI Instruction & back() const
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
static bool isCondBranchOpcode(int Opc)
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
static bool isPTrueOpcode(unsigned Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool succeeded(LogicalResult Result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2472
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
static bool isIndirectBranchOpcode(int Opc)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
@ AArch64FrameOffsetIsLegal
Offset is legal.
@ AArch64FrameOffsetCanUpdate
Offset can apply, at least partly.
@ AArch64FrameOffsetCannotUpdate
Offset cannot apply.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
Op::Description Desc
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
static bool isSEHInstruction(const MachineInstr &MI)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
AArch64MachineCombinerPattern
@ MULSUBv8i16_OP2
@ FMULv4i16_indexed_OP1
@ FMLSv1i32_indexed_OP2
@ MULSUBv2i32_indexed_OP1
@ FMLAv2i32_indexed_OP2
@ MULADDv4i16_indexed_OP2
@ FMLAv1i64_indexed_OP1
@ MULSUBv16i8_OP1
@ FMLAv8i16_indexed_OP2
@ FMULv2i32_indexed_OP1
@ MULSUBv8i16_indexed_OP2
@ FMLAv1i64_indexed_OP2
@ MULSUBv4i16_indexed_OP2
@ FMLAv1i32_indexed_OP1
@ FMLAv2i64_indexed_OP2
@ FMLSv8i16_indexed_OP1
@ MULSUBv2i32_OP1
@ FMULv4i16_indexed_OP2
@ MULSUBv4i32_indexed_OP2
@ FMULv2i64_indexed_OP2
@ FMLAv4i32_indexed_OP1
@ MULADDv4i16_OP2
@ FMULv8i16_indexed_OP2
@ MULSUBv4i16_OP1
@ MULADDv4i32_OP2
@ MULADDv2i32_OP2
@ MULADDv16i8_OP2
@ FMLSv4i16_indexed_OP1
@ MULADDv16i8_OP1
@ FMLAv2i64_indexed_OP1
@ FMLAv1i32_indexed_OP2
@ FMLSv2i64_indexed_OP2
@ MULADDv2i32_OP1
@ MULADDv4i32_OP1
@ MULADDv2i32_indexed_OP1
@ MULSUBv16i8_OP2
@ MULADDv4i32_indexed_OP1
@ MULADDv2i32_indexed_OP2
@ FMLAv4i16_indexed_OP2
@ MULSUBv8i16_OP1
@ FMULv2i32_indexed_OP2
@ FMLSv2i32_indexed_OP2
@ FMLSv4i32_indexed_OP1
@ FMULv2i64_indexed_OP1
@ MULSUBv4i16_OP2
@ FMLSv4i16_indexed_OP2
@ FMLAv2i32_indexed_OP1
@ FMLSv2i32_indexed_OP1
@ FMLAv8i16_indexed_OP1
@ MULSUBv4i16_indexed_OP1
@ FMLSv4i32_indexed_OP2
@ MULADDv4i32_indexed_OP2
@ MULSUBv4i32_OP2
@ MULSUBv8i16_indexed_OP1
@ MULADDv8i16_OP2
@ MULSUBv2i32_indexed_OP2
@ FMULv4i32_indexed_OP2
@ FMLSv2i64_indexed_OP1
@ MULADDv4i16_OP1
@ FMLAv4i32_indexed_OP2
@ MULADDv8i16_indexed_OP1
@ FMULv4i32_indexed_OP1
@ FMLAv4i16_indexed_OP1
@ FMULv8i16_indexed_OP1
@ MULADDv8i16_OP1
@ MULSUBv4i32_indexed_OP1
@ MULSUBv4i32_OP1
@ FMLSv8i16_indexed_OP2
@ MULADDv8i16_indexed_OP2
@ MULSUBv2i32_OP2
@ FMLSv1i64_indexed_OP2
@ MULADDv4i16_indexed_OP1
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
unsigned getUndefRegState(bool B)
static MCRegister getXRegFromWReg(MCRegister Reg)
unsigned getDefRegState(bool B)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA, std::optional< int64_t > IncomingVGOffsetFromDefCFA)
unsigned getKillRegState(bool B)
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
static bool isUncondBranchOpcode(int Opc)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2120
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII)
rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
static const MachineMemOperand::Flags MOSuppressPair
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
void appendLEB128(SmallVectorImpl< U > &Buffer, T Value)
Definition LEB128.h:236
std::pair< MachineOperand, DIExpression * > ParamLoadedValue
bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, const MachineInstr &UseMI, const TargetRegisterInfo *TRI)
Return true if there is an instruction /after/ DefMI and before UseMI which either reads or clobbers ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
static const MachineMemOperand::Flags MOStridedAccess
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
LLVM_ABI static const MBBSectionID ColdSectionID
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
An individual sequence of instructions to be replaced with a call to an outlined function.
MachineFunction * getMF() const
The information necessary to create an outlined function for some class of candidate.