LLVM 23.0.0git
ARMLoadStoreOptimizer.cpp
Go to the documentation of this file.
1//===- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file contains a pass that performs load / store related peephole
10/// optimizations. This pass should be run after register allocation.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARM.h"
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMISelLowering.h"
19#include "ARMSubtarget.h"
22#include "Utils/ARMBaseInfo.h"
23#include "llvm/ADT/ArrayRef.h"
24#include "llvm/ADT/DenseMap.h"
25#include "llvm/ADT/DenseSet.h"
26#include "llvm/ADT/STLExtras.h"
27#include "llvm/ADT/SetVector.h"
29#include "llvm/ADT/SmallSet.h"
31#include "llvm/ADT/Statistic.h"
51#include "llvm/IR/DataLayout.h"
52#include "llvm/IR/DebugLoc.h"
53#include "llvm/IR/Function.h"
54#include "llvm/IR/Type.h"
56#include "llvm/MC/MCInstrDesc.h"
57#include "llvm/Pass.h"
60#include "llvm/Support/Debug.h"
63#include <cassert>
64#include <cstddef>
65#include <cstdlib>
66#include <iterator>
67#include <limits>
68#include <utility>
69
70using namespace llvm;
71
72#define DEBUG_TYPE "arm-ldst-opt"
73
74STATISTIC(NumLDMGened , "Number of ldm instructions generated");
75STATISTIC(NumSTMGened , "Number of stm instructions generated");
76STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
77STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
78STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
79STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
80STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
81STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
82STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
83STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
84STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
85
86/// This switch disables formation of double/multi instructions that could
87/// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP
88/// disabled. This can be used to create libraries that are robust even when
89/// users provoke undefined behaviour by supplying misaligned pointers.
90/// \see mayCombineMisaligned()
91static cl::opt<bool>
92AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden,
93 cl::init(false), cl::desc("Be more conservative in ARM load/store opt"));
94
95#define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
96
97namespace {
98
99/// Post- register allocation pass the combine load / store instructions to
100/// form ldm / stm instructions.
101struct ARMLoadStoreOpt {
102 const MachineFunction *MF;
103 const TargetInstrInfo *TII;
104 const TargetRegisterInfo *TRI;
105 const ARMSubtarget *STI;
106 const TargetLowering *TL;
107 ARMFunctionInfo *AFI;
109 RegisterClassInfo RegClassInfo;
111 bool LiveRegsValid;
112 bool RegClassInfoValid;
113 bool isThumb1, isThumb2;
114
115 bool runOnMachineFunction(MachineFunction &Fn);
116
117private:
118 /// A set of load/store MachineInstrs with same base register sorted by
119 /// offset.
120 struct MemOpQueueEntry {
122 int Offset; ///< Load/Store offset.
123 unsigned Position; ///< Position as counted from end of basic block.
124
125 MemOpQueueEntry(MachineInstr &MI, int Offset, unsigned Position)
126 : MI(&MI), Offset(Offset), Position(Position) {}
127 };
128 using MemOpQueue = SmallVector<MemOpQueueEntry, 8>;
129
130 /// A set of MachineInstrs that fulfill (nearly all) conditions to get
131 /// merged into a LDM/STM.
132 struct MergeCandidate {
133 /// List of instructions ordered by load/store offset.
135
136 /// Index in Instrs of the instruction being latest in the schedule.
137 unsigned LatestMIIdx;
138
139 /// Index in Instrs of the instruction being earliest in the schedule.
140 unsigned EarliestMIIdx;
141
142 /// Index into the basic block where the merged instruction will be
143 /// inserted. (See MemOpQueueEntry.Position)
144 unsigned InsertPos;
145
146 /// Whether the instructions can be merged into a ldm/stm instruction.
147 bool CanMergeToLSMulti;
148
149 /// Whether the instructions can be merged into a ldrd/strd instruction.
150 bool CanMergeToLSDouble;
151 };
154 SmallVector<MachineInstr *, 4> MergeBaseCandidates;
155
156 void moveLiveRegsBefore(const MachineBasicBlock &MBB,
158 unsigned findFreeReg(const TargetRegisterClass &RegClass);
159 void UpdateBaseRegUses(MachineBasicBlock &MBB,
161 unsigned Base, unsigned WordOffset,
162 ARMCC::CondCodes Pred, unsigned PredReg);
163 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB,
164 MachineBasicBlock::iterator InsertBefore,
165 int Offset, unsigned Base, bool BaseKill,
166 unsigned Opcode, ARMCC::CondCodes Pred,
167 unsigned PredReg, const DebugLoc &DL,
168 ArrayRef<std::pair<unsigned, bool>> Regs,
170 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB,
171 MachineBasicBlock::iterator InsertBefore,
172 int Offset, unsigned Base, bool BaseKill,
173 unsigned Opcode, ARMCC::CondCodes Pred,
174 unsigned PredReg, const DebugLoc &DL,
175 ArrayRef<std::pair<unsigned, bool>> Regs,
176 ArrayRef<MachineInstr *> Instrs) const;
177 void FormCandidates(const MemOpQueue &MemOps);
178 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
179 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
181 bool MergeBaseUpdateLoadStore(MachineInstr *MI);
182 bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
183 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
184 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
185 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
186 bool CombineMovBx(MachineBasicBlock &MBB);
187};
188
189struct ARMLoadStoreOptLegacy : public MachineFunctionPass {
190 static char ID;
191
192 ARMLoadStoreOptLegacy() : MachineFunctionPass(ID) {}
193
194 bool runOnMachineFunction(MachineFunction &Fn) override;
195
196 MachineFunctionProperties getRequiredProperties() const override {
197 return MachineFunctionProperties().setNoVRegs();
198 }
199
200 StringRef getPassName() const override { return ARM_LOAD_STORE_OPT_NAME; }
201};
202
203char ARMLoadStoreOptLegacy::ID = 0;
204
205} // end anonymous namespace
206
207INITIALIZE_PASS(ARMLoadStoreOptLegacy, "arm-ldst-opt", ARM_LOAD_STORE_OPT_NAME,
208 false, false)
209
210static bool definesCPSR(const MachineInstr &MI) {
211 for (const auto &MO : MI.operands()) {
212 if (!MO.isReg())
213 continue;
214 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
215 // If the instruction has live CPSR def, then it's not safe to fold it
216 // into load / store.
217 return true;
218 }
219
220 return false;
221}
222
224 unsigned Opcode = MI.getOpcode();
225 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
226 unsigned NumOperands = MI.getDesc().getNumOperands();
227 unsigned OffField = MI.getOperand(NumOperands - 3).getImm();
228
229 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
230 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
231 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
232 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
233 return OffField;
234
235 // Thumb1 immediate offsets are scaled by 4
236 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
237 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
238 return OffField * 4;
239
240 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
241 : ARM_AM::getAM5Offset(OffField) * 4;
242 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
243 : ARM_AM::getAM5Op(OffField);
244
245 if (Op == ARM_AM::sub)
246 return -Offset;
247
248 return Offset;
249}
250
252 return MI.getOperand(1);
253}
254
256 return MI.getOperand(0);
257}
258
260 switch (Opcode) {
261 default: llvm_unreachable("Unhandled opcode!");
262 case ARM::LDRi12:
263 ++NumLDMGened;
264 switch (Mode) {
265 default: llvm_unreachable("Unhandled submode!");
266 case ARM_AM::ia: return ARM::LDMIA;
267 case ARM_AM::da: return ARM::LDMDA;
268 case ARM_AM::db: return ARM::LDMDB;
269 case ARM_AM::ib: return ARM::LDMIB;
270 }
271 case ARM::STRi12:
272 ++NumSTMGened;
273 switch (Mode) {
274 default: llvm_unreachable("Unhandled submode!");
275 case ARM_AM::ia: return ARM::STMIA;
276 case ARM_AM::da: return ARM::STMDA;
277 case ARM_AM::db: return ARM::STMDB;
278 case ARM_AM::ib: return ARM::STMIB;
279 }
280 case ARM::tLDRi:
281 case ARM::tLDRspi:
282 // tLDMIA is writeback-only - unless the base register is in the input
283 // reglist.
284 ++NumLDMGened;
285 switch (Mode) {
286 default: llvm_unreachable("Unhandled submode!");
287 case ARM_AM::ia: return ARM::tLDMIA;
288 }
289 case ARM::tSTRi:
290 case ARM::tSTRspi:
291 // There is no non-writeback tSTMIA either.
292 ++NumSTMGened;
293 switch (Mode) {
294 default: llvm_unreachable("Unhandled submode!");
295 case ARM_AM::ia: return ARM::tSTMIA_UPD;
296 }
297 case ARM::t2LDRi8:
298 case ARM::t2LDRi12:
299 ++NumLDMGened;
300 switch (Mode) {
301 default: llvm_unreachable("Unhandled submode!");
302 case ARM_AM::ia: return ARM::t2LDMIA;
303 case ARM_AM::db: return ARM::t2LDMDB;
304 }
305 case ARM::t2STRi8:
306 case ARM::t2STRi12:
307 ++NumSTMGened;
308 switch (Mode) {
309 default: llvm_unreachable("Unhandled submode!");
310 case ARM_AM::ia: return ARM::t2STMIA;
311 case ARM_AM::db: return ARM::t2STMDB;
312 }
313 case ARM::VLDRS:
314 ++NumVLDMGened;
315 switch (Mode) {
316 default: llvm_unreachable("Unhandled submode!");
317 case ARM_AM::ia: return ARM::VLDMSIA;
318 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
319 }
320 case ARM::VSTRS:
321 ++NumVSTMGened;
322 switch (Mode) {
323 default: llvm_unreachable("Unhandled submode!");
324 case ARM_AM::ia: return ARM::VSTMSIA;
325 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
326 }
327 case ARM::VLDRD:
328 ++NumVLDMGened;
329 switch (Mode) {
330 default: llvm_unreachable("Unhandled submode!");
331 case ARM_AM::ia: return ARM::VLDMDIA;
332 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
333 }
334 case ARM::VSTRD:
335 ++NumVSTMGened;
336 switch (Mode) {
337 default: llvm_unreachable("Unhandled submode!");
338 case ARM_AM::ia: return ARM::VSTMDIA;
339 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
340 }
341 }
342}
343
345 switch (Opcode) {
346 default: llvm_unreachable("Unhandled opcode!");
347 case ARM::LDMIA_RET:
348 case ARM::LDMIA:
349 case ARM::LDMIA_UPD:
350 case ARM::STMIA:
351 case ARM::STMIA_UPD:
352 case ARM::tLDMIA:
353 case ARM::tLDMIA_UPD:
354 case ARM::tSTMIA_UPD:
355 case ARM::t2LDMIA_RET:
356 case ARM::t2LDMIA:
357 case ARM::t2LDMIA_UPD:
358 case ARM::t2STMIA:
359 case ARM::t2STMIA_UPD:
360 case ARM::VLDMSIA:
361 case ARM::VLDMSIA_UPD:
362 case ARM::VSTMSIA:
363 case ARM::VSTMSIA_UPD:
364 case ARM::VLDMDIA:
365 case ARM::VLDMDIA_UPD:
366 case ARM::VSTMDIA:
367 case ARM::VSTMDIA_UPD:
368 return ARM_AM::ia;
369
370 case ARM::LDMDA:
371 case ARM::LDMDA_UPD:
372 case ARM::STMDA:
373 case ARM::STMDA_UPD:
374 return ARM_AM::da;
375
376 case ARM::LDMDB:
377 case ARM::LDMDB_UPD:
378 case ARM::STMDB:
379 case ARM::STMDB_UPD:
380 case ARM::t2LDMDB:
381 case ARM::t2LDMDB_UPD:
382 case ARM::t2STMDB:
383 case ARM::t2STMDB_UPD:
384 case ARM::VLDMSDB_UPD:
385 case ARM::VSTMSDB_UPD:
386 case ARM::VLDMDDB_UPD:
387 case ARM::VSTMDDB_UPD:
388 return ARM_AM::db;
389
390 case ARM::LDMIB:
391 case ARM::LDMIB_UPD:
392 case ARM::STMIB:
393 case ARM::STMIB_UPD:
394 return ARM_AM::ib;
395 }
396}
397
398static bool isT1i32Load(unsigned Opc) {
399 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
400}
401
402static bool isT2i32Load(unsigned Opc) {
403 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
404}
405
406static bool isi32Load(unsigned Opc) {
407 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
408}
409
410static bool isT1i32Store(unsigned Opc) {
411 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
412}
413
414static bool isT2i32Store(unsigned Opc) {
415 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
416}
417
418static bool isi32Store(unsigned Opc) {
419 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
420}
421
422static bool isLoadSingle(unsigned Opc) {
423 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
424}
425
426static unsigned getImmScale(unsigned Opc) {
427 switch (Opc) {
428 default: llvm_unreachable("Unhandled opcode!");
429 case ARM::tLDRi:
430 case ARM::tSTRi:
431 case ARM::tLDRspi:
432 case ARM::tSTRspi:
433 return 1;
434 case ARM::tLDRHi:
435 case ARM::tSTRHi:
436 return 2;
437 case ARM::tLDRBi:
438 case ARM::tSTRBi:
439 return 4;
440 }
441}
442
444 switch (MI->getOpcode()) {
445 default: return 0;
446 case ARM::LDRi12:
447 case ARM::STRi12:
448 case ARM::tLDRi:
449 case ARM::tSTRi:
450 case ARM::tLDRspi:
451 case ARM::tSTRspi:
452 case ARM::t2LDRi8:
453 case ARM::t2LDRi12:
454 case ARM::t2STRi8:
455 case ARM::t2STRi12:
456 case ARM::VLDRS:
457 case ARM::VSTRS:
458 return 4;
459 case ARM::VLDRD:
460 case ARM::VSTRD:
461 return 8;
462 case ARM::LDMIA:
463 case ARM::LDMDA:
464 case ARM::LDMDB:
465 case ARM::LDMIB:
466 case ARM::STMIA:
467 case ARM::STMDA:
468 case ARM::STMDB:
469 case ARM::STMIB:
470 case ARM::tLDMIA:
471 case ARM::tLDMIA_UPD:
472 case ARM::tSTMIA_UPD:
473 case ARM::t2LDMIA:
474 case ARM::t2LDMDB:
475 case ARM::t2STMIA:
476 case ARM::t2STMDB:
477 case ARM::VLDMSIA:
478 case ARM::VSTMSIA:
479 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
480 case ARM::VLDMDIA:
481 case ARM::VSTMDIA:
482 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
483 }
484}
485
486/// Update future uses of the base register with the offset introduced
487/// due to writeback. This function only works on Thumb1.
488void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
490 const DebugLoc &DL, unsigned Base,
491 unsigned WordOffset,
492 ARMCC::CondCodes Pred,
493 unsigned PredReg) {
494 assert(isThumb1 && "Can only update base register uses for Thumb1!");
495 // Start updating any instructions with immediate offsets. Insert a SUB before
496 // the first non-updateable instruction (if any).
497 for (; MBBI != MBB.end(); ++MBBI) {
498 bool InsertSub = false;
499 unsigned Opc = MBBI->getOpcode();
500
501 if (MBBI->readsRegister(Base, /*TRI=*/nullptr)) {
502 int Offset;
503 bool IsLoad =
504 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
505 bool IsStore =
506 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
507
508 if (IsLoad || IsStore) {
509 // Loads and stores with immediate offsets can be updated, but only if
510 // the new offset isn't negative.
511 // The MachineOperand containing the offset immediate is the last one
512 // before predicates.
513 MachineOperand &MO =
514 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
515 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
516 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
517
518 // If storing the base register, it needs to be reset first.
519 Register InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
520
521 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
522 MO.setImm(Offset);
523 else
524 InsertSub = true;
525 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
526 !definesCPSR(*MBBI)) {
527 // SUBS/ADDS using this register, with a dead def of the CPSR.
528 // Merge it with the update; if the merged offset is too large,
529 // insert a new sub instead.
530 MachineOperand &MO =
531 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
532 Offset = (Opc == ARM::tSUBi8) ?
533 MO.getImm() + WordOffset * 4 :
534 MO.getImm() - WordOffset * 4 ;
535 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
536 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
537 // Offset == 0.
538 MO.setImm(Offset);
539 // The base register has now been reset, so exit early.
540 return;
541 } else {
542 InsertSub = true;
543 }
544 } else {
545 // Can't update the instruction.
546 InsertSub = true;
547 }
548 } else if (definesCPSR(*MBBI) || MBBI->isCall() || MBBI->isBranch()) {
549 // Since SUBS sets the condition flags, we can't place the base reset
550 // after an instruction that has a live CPSR def.
551 // The base register might also contain an argument for a function call.
552 InsertSub = true;
553 }
554
555 if (InsertSub) {
556 // An instruction above couldn't be updated, so insert a sub.
557 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
558 .add(t1CondCodeOp(true))
559 .addReg(Base)
560 .addImm(WordOffset * 4)
561 .addImm(Pred)
562 .addReg(PredReg);
563 return;
564 }
565
566 if (MBBI->killsRegister(Base, /*TRI=*/nullptr) ||
567 MBBI->definesRegister(Base, /*TRI=*/nullptr))
568 // Register got killed. Stop updating.
569 return;
570 }
571
572 // End of block was reached.
573 if (!MBB.succ_empty()) {
574 // FIXME: Because of a bug, live registers are sometimes missing from
575 // the successor blocks' live-in sets. This means we can't trust that
576 // information and *always* have to reset at the end of a block.
577 // See PR21029.
578 if (MBBI != MBB.end()) --MBBI;
579 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
580 .add(t1CondCodeOp(true))
581 .addReg(Base)
582 .addImm(WordOffset * 4)
583 .addImm(Pred)
584 .addReg(PredReg);
585 }
586}
587
588/// Return the first register of class \p RegClass that is not in \p Regs.
589unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
590 if (!RegClassInfoValid) {
591 RegClassInfo.runOnMachineFunction(*MF);
592 RegClassInfoValid = true;
593 }
594
595 for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
596 if (LiveRegs.available(Reg) && !MF->getRegInfo().isReserved(Reg))
597 return Reg;
598 return 0;
599}
600
601/// Compute live registers just before instruction \p Before (in normal schedule
602/// direction). Computes backwards so multiple queries in the same block must
603/// come in reverse order.
604void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
606 // Initialize if we never queried in this block.
607 if (!LiveRegsValid) {
608 LiveRegs.init(*TRI);
609 LiveRegs.addLiveOuts(MBB);
610 LiveRegPos = MBB.end();
611 LiveRegsValid = true;
612 }
613 // Move backward just before the "Before" position.
614 while (LiveRegPos != Before) {
615 --LiveRegPos;
616 if (!LiveRegPos->isDebugInstr())
617 LiveRegs.stepBackward(*LiveRegPos);
618 }
619}
620
621static bool ContainsReg(ArrayRef<std::pair<unsigned, bool>> Regs,
622 unsigned Reg) {
623 for (const std::pair<unsigned, bool> &R : Regs)
624 if (R.first == Reg)
625 return true;
626 return false;
627}
628
629/// Create and insert a LDM or STM with Base as base register and registers in
630/// Regs as the register operands that would be loaded / stored. It returns
631/// true if the transformation is done.
632MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
633 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
634 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
635 ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
636 ArrayRef<std::pair<unsigned, bool>> Regs,
638 unsigned NumRegs = Regs.size();
639 assert(NumRegs > 1);
640
641 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
642 // Compute liveness information for that register to make the decision.
643 bool SafeToClobberCPSR = !isThumb1 ||
644 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
646
647 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
648
649 // Exception: If the base register is in the input reglist, Thumb1 LDM is
650 // non-writeback.
651 // It's also not possible to merge an STR of the base register in Thumb1.
652 if (isThumb1 && ContainsReg(Regs, Base)) {
653 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
654 if (Opcode == ARM::tLDRi)
655 Writeback = false;
656 else if (Opcode == ARM::tSTRi)
657 return nullptr;
658 }
659
661 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
662 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
663 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
664
665 if (Offset == 4 && haveIBAndDA) {
667 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
669 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
670 // VLDM/VSTM do not support DB mode without also updating the base reg.
672 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
673 // Check if this is a supported opcode before inserting instructions to
674 // calculate a new base register.
675 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
676
677 // If starting offset isn't zero, insert a MI to materialize a new base.
678 // But only do so if it is cost effective, i.e. merging more than two
679 // loads / stores.
680 if (NumRegs <= 2)
681 return nullptr;
682
683 // On Thumb1, it's not worth materializing a new base register without
684 // clobbering the CPSR (i.e. not using ADDS/SUBS).
685 if (!SafeToClobberCPSR)
686 return nullptr;
687
688 unsigned NewBase;
689 if (isi32Load(Opcode)) {
690 // If it is a load, then just use one of the destination registers
691 // as the new base. Will no longer be writeback in Thumb1.
692 NewBase = Regs[NumRegs-1].first;
693 Writeback = false;
694 } else {
695 // Find a free register that we can use as scratch register.
696 moveLiveRegsBefore(MBB, InsertBefore);
697 // The merged instruction does not exist yet but will use several Regs if
698 // it is a Store.
699 if (!isLoadSingle(Opcode))
700 for (const std::pair<unsigned, bool> &R : Regs)
701 LiveRegs.addReg(R.first);
702
703 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
704 if (NewBase == 0)
705 return nullptr;
706 }
707
708 int BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2ADDspImm
709 : ARM::t2ADDri)
710 : (isThumb1 && Base == ARM::SP)
711 ? ARM::tADDrSPi
712 : (isThumb1 && Offset < 8)
713 ? ARM::tADDi3
714 : isThumb1 ? ARM::tADDi8 : ARM::ADDri;
715
716 if (Offset < 0) {
717 // FIXME: There are no Thumb1 load/store instructions with negative
718 // offsets. So the Base != ARM::SP might be unnecessary.
719 Offset = -Offset;
720 BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2SUBspImm
721 : ARM::t2SUBri)
722 : (isThumb1 && Offset < 8 && Base != ARM::SP)
723 ? ARM::tSUBi3
724 : isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
725 }
726
727 if (!TL->isLegalAddImmediate(Offset))
728 // FIXME: Try add with register operand?
729 return nullptr; // Probably not worth it then.
730
731 // We can only append a kill flag to the add/sub input if the value is not
732 // used in the register list of the stm as well.
733 bool KillOldBase = BaseKill &&
734 (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
735
736 if (isThumb1) {
737 // Thumb1: depending on immediate size, use either
738 // ADDS NewBase, Base, #imm3
739 // or
740 // MOV NewBase, Base
741 // ADDS NewBase, #imm8.
742 if (Base != NewBase &&
743 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
744 // Need to insert a MOV to the new base first.
745 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
746 !STI->hasV6Ops()) {
747 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
748 if (Pred != ARMCC::AL)
749 return nullptr;
750 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
751 .addReg(Base, getKillRegState(KillOldBase));
752 } else
753 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
754 .addReg(Base, getKillRegState(KillOldBase))
755 .add(predOps(Pred, PredReg));
756
757 // The following ADDS/SUBS becomes an update.
758 Base = NewBase;
759 KillOldBase = true;
760 }
761 if (BaseOpc == ARM::tADDrSPi) {
762 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
763 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
764 .addReg(Base, getKillRegState(KillOldBase))
765 .addImm(Offset / 4)
766 .add(predOps(Pred, PredReg));
767 } else
768 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
769 .add(t1CondCodeOp(true))
770 .addReg(Base, getKillRegState(KillOldBase))
771 .addImm(Offset)
772 .add(predOps(Pred, PredReg));
773 } else {
774 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
775 .addReg(Base, getKillRegState(KillOldBase))
776 .addImm(Offset)
777 .add(predOps(Pred, PredReg))
778 .add(condCodeOp());
779 }
780 Base = NewBase;
781 BaseKill = true; // New base is always killed straight away.
782 }
783
784 bool isDef = isLoadSingle(Opcode);
785
786 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
787 // base register writeback.
788 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
789 if (!Opcode)
790 return nullptr;
791
792 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
793 // - There is no writeback (LDM of base register),
794 // - the base register is killed by the merged instruction,
795 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
796 // to reset the base register.
797 // Otherwise, don't merge.
798 // It's safe to return here since the code to materialize a new base register
799 // above is also conditional on SafeToClobberCPSR.
800 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
801 return nullptr;
802
803 MachineInstrBuilder MIB;
804
805 if (Writeback) {
806 assert(isThumb1 && "expected Writeback only inThumb1");
807 if (Opcode == ARM::tLDMIA) {
808 assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
809 // Update tLDMIA with writeback if necessary.
810 Opcode = ARM::tLDMIA_UPD;
811 }
812
813 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
814
815 // Thumb1: we might need to set base writeback when building the MI.
816 MIB.addReg(Base, getDefRegState(true))
817 .addReg(Base, getKillRegState(BaseKill));
818
819 // The base isn't dead after a merged instruction with writeback.
820 // Insert a sub instruction after the newly formed instruction to reset.
821 if (!BaseKill)
822 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
823 } else {
824 // No writeback, simply build the MachineInstr.
825 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
826 MIB.addReg(Base, getKillRegState(BaseKill));
827 }
828
829 MIB.addImm(Pred).addReg(PredReg);
830
831 for (const std::pair<unsigned, bool> &R : Regs)
832 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
833
834 MIB.cloneMergedMemRefs(Instrs);
835
836 return MIB.getInstr();
837}
838
839MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
840 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
841 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
842 ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
843 ArrayRef<std::pair<unsigned, bool>> Regs,
844 ArrayRef<MachineInstr*> Instrs) const {
845 bool IsLoad = isi32Load(Opcode);
846 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
847 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
848
849 assert(Regs.size() == 2);
850 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
851 TII->get(LoadStoreOpcode));
852 if (IsLoad) {
853 MIB.addReg(Regs[0].first, RegState::Define)
854 .addReg(Regs[1].first, RegState::Define);
855 } else {
856 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
857 .addReg(Regs[1].first, getKillRegState(Regs[1].second));
858 }
859 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
860 MIB.cloneMergedMemRefs(Instrs);
861 return MIB.getInstr();
862}
863
864/// Call MergeOps and update MemOps and merges accordingly on success.
865MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
866 const MachineInstr *First = Cand.Instrs.front();
867 unsigned Opcode = First->getOpcode();
868 bool IsLoad = isLoadSingle(Opcode);
870 SmallVector<unsigned, 4> ImpDefs;
871 DenseSet<unsigned> KilledRegs;
872 DenseSet<unsigned> UsedRegs;
873 // Determine list of registers and list of implicit super-register defs.
874 for (const MachineInstr *MI : Cand.Instrs) {
875 const MachineOperand &MO = getLoadStoreRegOp(*MI);
876 Register Reg = MO.getReg();
877 bool IsKill = MO.isKill();
878 if (IsKill)
879 KilledRegs.insert(Reg);
880 Regs.push_back(std::make_pair(Reg, IsKill));
881 UsedRegs.insert(Reg);
882
883 if (IsLoad) {
884 // Collect any implicit defs of super-registers, after merging we can't
885 // be sure anymore that we properly preserved these live ranges and must
886 // removed these implicit operands.
887 for (const MachineOperand &MO : MI->implicit_operands()) {
888 if (!MO.isReg() || !MO.isDef() || MO.isDead())
889 continue;
890 assert(MO.isImplicit());
891 Register DefReg = MO.getReg();
892
893 if (is_contained(ImpDefs, DefReg))
894 continue;
895 // We can ignore cases where the super-reg is read and written.
896 if (MI->readsRegister(DefReg, /*TRI=*/nullptr))
897 continue;
898 ImpDefs.push_back(DefReg);
899 }
900 }
901 }
902
903 // Attempt the merge.
905
906 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
907 iterator InsertBefore = std::next(iterator(LatestMI));
908 MachineBasicBlock &MBB = *LatestMI->getParent();
909 unsigned Offset = getMemoryOpOffset(*First);
911 bool BaseKill = LatestMI->killsRegister(Base, /*TRI=*/nullptr);
912 Register PredReg;
913 ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg);
914 DebugLoc DL = First->getDebugLoc();
915 MachineInstr *Merged = nullptr;
916 if (Cand.CanMergeToLSDouble)
917 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
918 Opcode, Pred, PredReg, DL, Regs,
919 Cand.Instrs);
920 if (!Merged && Cand.CanMergeToLSMulti)
921 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
922 Opcode, Pred, PredReg, DL, Regs, Cand.Instrs);
923 if (!Merged)
924 return nullptr;
925
926 // Determine earliest instruction that will get removed. We then keep an
927 // iterator just above it so the following erases don't invalidated it.
928 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
929 bool EarliestAtBegin = false;
930 if (EarliestI == MBB.begin()) {
931 EarliestAtBegin = true;
932 } else {
933 EarliestI = std::prev(EarliestI);
934 }
935
936 // Remove instructions which have been merged.
937 for (MachineInstr *MI : Cand.Instrs)
938 MBB.erase(MI);
939
940 // Determine range between the earliest removed instruction and the new one.
941 if (EarliestAtBegin)
942 EarliestI = MBB.begin();
943 else
944 EarliestI = std::next(EarliestI);
945 auto FixupRange = make_range(EarliestI, iterator(Merged));
946
947 if (isLoadSingle(Opcode)) {
948 // If the previous loads defined a super-reg, then we have to mark earlier
949 // operands undef; Replicate the super-reg def on the merged instruction.
950 for (MachineInstr &MI : FixupRange) {
951 for (unsigned &ImpDefReg : ImpDefs) {
952 for (MachineOperand &MO : MI.implicit_operands()) {
953 if (!MO.isReg() || MO.getReg() != ImpDefReg)
954 continue;
955 if (MO.readsReg())
956 MO.setIsUndef();
957 else if (MO.isDef())
958 ImpDefReg = 0;
959 }
960 }
961 }
962
963 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
964 for (unsigned ImpDef : ImpDefs)
965 MIB.addReg(ImpDef, RegState::ImplicitDefine);
966 } else {
967 // Remove kill flags: We are possibly storing the values later now.
968 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
969 for (MachineInstr &MI : FixupRange) {
970 for (MachineOperand &MO : MI.uses()) {
971 if (!MO.isReg() || !MO.isKill())
972 continue;
973 if (UsedRegs.count(MO.getReg()))
974 MO.setIsKill(false);
975 }
976 }
977 assert(ImpDefs.empty());
978 }
979
980 return Merged;
981}
982
984 unsigned Value = abs(Offset);
985 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
986 // multiplied by 4.
987 return (Value % 4) == 0 && Value < 1024;
988}
989
990/// Return true for loads/stores that can be combined to a double/multi
991/// operation without increasing the requirements for alignment.
993 const MachineInstr &MI) {
994 // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no
995 // difference.
996 unsigned Opcode = MI.getOpcode();
997 if (!isi32Load(Opcode) && !isi32Store(Opcode))
998 return true;
999
1000 // Stack pointer alignment is out of the programmers control so we can trust
1001 // SP-relative loads/stores.
1002 if (getLoadStoreBaseOp(MI).getReg() == ARM::SP &&
1004 return true;
1005 return false;
1006}
1007
1008/// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
1009void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
1010 const MachineInstr *FirstMI = MemOps[0].MI;
1011 unsigned Opcode = FirstMI->getOpcode();
1012 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
1013 unsigned Size = getLSMultipleTransferSize(FirstMI);
1014
1015 unsigned SIndex = 0;
1016 unsigned EIndex = MemOps.size();
1017 do {
1018 // Look at the first instruction.
1019 const MachineInstr *MI = MemOps[SIndex].MI;
1020 int Offset = MemOps[SIndex].Offset;
1021 const MachineOperand &PMO = getLoadStoreRegOp(*MI);
1022 Register PReg = PMO.getReg();
1023 unsigned PRegNum = PMO.isUndef() ? std::numeric_limits<unsigned>::max()
1024 : TRI->getEncodingValue(PReg);
1025 unsigned Latest = SIndex;
1026 unsigned Earliest = SIndex;
1027 unsigned Count = 1;
1028 bool CanMergeToLSDouble =
1029 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
1030 // ARM errata 602117: LDRD with base in list may result in incorrect base
1031 // register when interrupted or faulted.
1032 if (STI->isCortexM3() && isi32Load(Opcode) &&
1033 PReg == getLoadStoreBaseOp(*MI).getReg())
1034 CanMergeToLSDouble = false;
1035
1036 bool CanMergeToLSMulti = true;
1037 // On swift vldm/vstm starting with an odd register number as that needs
1038 // more uops than single vldrs.
1039 if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
1040 CanMergeToLSMulti = false;
1041
1042 // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
1043 // deprecated; LDM to PC is fine but cannot happen here.
1044 if (PReg == ARM::SP || PReg == ARM::PC)
1045 CanMergeToLSMulti = CanMergeToLSDouble = false;
1046
1047 // Should we be conservative?
1049 CanMergeToLSMulti = CanMergeToLSDouble = false;
1050
1051 // vldm / vstm limit are 32 for S variants, 16 for D variants.
1052 unsigned Limit;
1053 switch (Opcode) {
1054 default:
1055 Limit = UINT_MAX;
1056 break;
1057 case ARM::VLDRD:
1058 case ARM::VSTRD:
1059 Limit = 16;
1060 break;
1061 }
1062
1063 // Merge following instructions where possible.
1064 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
1065 int NewOffset = MemOps[I].Offset;
1066 if (NewOffset != Offset + (int)Size)
1067 break;
1068 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
1069 Register Reg = MO.getReg();
1070 if (Reg == ARM::SP || Reg == ARM::PC)
1071 break;
1072 if (Count == Limit)
1073 break;
1074
1075 // See if the current load/store may be part of a multi load/store.
1076 unsigned RegNum = MO.isUndef() ? std::numeric_limits<unsigned>::max()
1077 : TRI->getEncodingValue(Reg);
1078 bool PartOfLSMulti = CanMergeToLSMulti;
1079 if (PartOfLSMulti) {
1080 // Register numbers must be in ascending order.
1081 if (RegNum <= PRegNum)
1082 PartOfLSMulti = false;
1083 // For VFP / NEON load/store multiples, the registers must be
1084 // consecutive and within the limit on the number of registers per
1085 // instruction.
1086 else if (!isNotVFP && RegNum != PRegNum+1)
1087 PartOfLSMulti = false;
1088 }
1089 // See if the current load/store may be part of a double load/store.
1090 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
1091
1092 if (!PartOfLSMulti && !PartOfLSDouble)
1093 break;
1094 CanMergeToLSMulti &= PartOfLSMulti;
1095 CanMergeToLSDouble &= PartOfLSDouble;
1096 // Track MemOp with latest and earliest position (Positions are
1097 // counted in reverse).
1098 unsigned Position = MemOps[I].Position;
1099 if (Position < MemOps[Latest].Position)
1100 Latest = I;
1101 else if (Position > MemOps[Earliest].Position)
1102 Earliest = I;
1103 // Prepare for next MemOp.
1104 Offset += Size;
1105 PRegNum = RegNum;
1106 }
1107
1108 // Form a candidate from the Ops collected so far.
1109 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
1110 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
1111 Candidate->Instrs.push_back(MemOps[C].MI);
1112 Candidate->LatestMIIdx = Latest - SIndex;
1113 Candidate->EarliestMIIdx = Earliest - SIndex;
1114 Candidate->InsertPos = MemOps[Latest].Position;
1115 if (Count == 1)
1116 CanMergeToLSMulti = CanMergeToLSDouble = false;
1117 Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
1118 Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
1119 Candidates.push_back(Candidate);
1120 // Continue after the chain.
1121 SIndex += Count;
1122 } while (SIndex < EIndex);
1123}
1124
1125static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1127 switch (Opc) {
1128 default: llvm_unreachable("Unhandled opcode!");
1129 case ARM::LDMIA:
1130 case ARM::LDMDA:
1131 case ARM::LDMDB:
1132 case ARM::LDMIB:
1133 switch (Mode) {
1134 default: llvm_unreachable("Unhandled submode!");
1135 case ARM_AM::ia: return ARM::LDMIA_UPD;
1136 case ARM_AM::ib: return ARM::LDMIB_UPD;
1137 case ARM_AM::da: return ARM::LDMDA_UPD;
1138 case ARM_AM::db: return ARM::LDMDB_UPD;
1139 }
1140 case ARM::STMIA:
1141 case ARM::STMDA:
1142 case ARM::STMDB:
1143 case ARM::STMIB:
1144 switch (Mode) {
1145 default: llvm_unreachable("Unhandled submode!");
1146 case ARM_AM::ia: return ARM::STMIA_UPD;
1147 case ARM_AM::ib: return ARM::STMIB_UPD;
1148 case ARM_AM::da: return ARM::STMDA_UPD;
1149 case ARM_AM::db: return ARM::STMDB_UPD;
1150 }
1151 case ARM::t2LDMIA:
1152 case ARM::t2LDMDB:
1153 switch (Mode) {
1154 default: llvm_unreachable("Unhandled submode!");
1155 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1156 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1157 }
1158 case ARM::t2STMIA:
1159 case ARM::t2STMDB:
1160 switch (Mode) {
1161 default: llvm_unreachable("Unhandled submode!");
1162 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1163 case ARM_AM::db: return ARM::t2STMDB_UPD;
1164 }
1165 case ARM::VLDMSIA:
1166 switch (Mode) {
1167 default: llvm_unreachable("Unhandled submode!");
1168 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1169 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1170 }
1171 case ARM::VLDMDIA:
1172 switch (Mode) {
1173 default: llvm_unreachable("Unhandled submode!");
1174 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1175 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1176 }
1177 case ARM::VSTMSIA:
1178 switch (Mode) {
1179 default: llvm_unreachable("Unhandled submode!");
1180 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1181 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1182 }
1183 case ARM::VSTMDIA:
1184 switch (Mode) {
1185 default: llvm_unreachable("Unhandled submode!");
1186 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1187 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1188 }
1189 }
1190}
1191
1192/// Check if the given instruction increments or decrements a register and
1193/// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
1194/// generated by the instruction are possibly read as well.
1196 ARMCC::CondCodes Pred, Register PredReg) {
1197 bool CheckCPSRDef;
1198 int Scale;
1199 switch (MI.getOpcode()) {
1200 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
1201 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
1202 case ARM::t2SUBri:
1203 case ARM::t2SUBspImm:
1204 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
1205 case ARM::t2ADDri:
1206 case ARM::t2ADDspImm:
1207 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
1208 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
1209 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
1210 default: return 0;
1211 }
1212
1213 Register MIPredReg;
1214 if (MI.getOperand(0).getReg() != Reg ||
1215 MI.getOperand(1).getReg() != Reg ||
1216 getInstrPredicate(MI, MIPredReg) != Pred ||
1217 MIPredReg != PredReg)
1218 return 0;
1219
1220 if (CheckCPSRDef && definesCPSR(MI))
1221 return 0;
1222 return MI.getOperand(2).getImm() * Scale;
1223}
1224
1225/// Searches for an increment or decrement of \p Reg before \p MBBI.
1228 ARMCC::CondCodes Pred, Register PredReg, int &Offset) {
1229 Offset = 0;
1230 MachineBasicBlock &MBB = *MBBI->getParent();
1231 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1232 MachineBasicBlock::iterator EndMBBI = MBB.end();
1233 if (MBBI == BeginMBBI)
1234 return EndMBBI;
1235
1236 // Skip debug values.
1237 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1238 while (PrevMBBI->isDebugInstr() && PrevMBBI != BeginMBBI)
1239 --PrevMBBI;
1240
1241 Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
1242 return Offset == 0 ? EndMBBI : PrevMBBI;
1243}
1244
1245/// Searches for a increment or decrement of \p Reg after \p MBBI.
1248 ARMCC::CondCodes Pred, Register PredReg, int &Offset,
1249 const TargetRegisterInfo *TRI) {
1250 Offset = 0;
1251 MachineBasicBlock &MBB = *MBBI->getParent();
1252 MachineBasicBlock::iterator EndMBBI = MBB.end();
1253 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1254 while (NextMBBI != EndMBBI) {
1255 // Skip debug values.
1256 while (NextMBBI != EndMBBI && NextMBBI->isDebugInstr())
1257 ++NextMBBI;
1258 if (NextMBBI == EndMBBI)
1259 return EndMBBI;
1260
1261 unsigned Off = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
1262 if (Off) {
1263 Offset = Off;
1264 return NextMBBI;
1265 }
1266
1267 // SP can only be combined if it is the next instruction after the original
1268 // MBBI, otherwise we may be incrementing the stack pointer (invalidating
1269 // anything below the new pointer) when its frame elements are still in
1270 // use. Other registers can attempt to look further, until a different use
1271 // or def of the register is found.
1272 if (Reg == ARM::SP || NextMBBI->readsRegister(Reg, TRI) ||
1273 NextMBBI->definesRegister(Reg, TRI))
1274 return EndMBBI;
1275
1276 ++NextMBBI;
1277 }
1278 return EndMBBI;
1279}
1280
1281/// Fold proceeding/trailing inc/dec of base register into the
1282/// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1283///
1284/// stmia rn, <ra, rb, rc>
1285/// rn := rn + 4 * 3;
1286/// =>
1287/// stmia rn!, <ra, rb, rc>
1288///
1289/// rn := rn - 4 * 3;
1290/// ldmia rn, <ra, rb, rc>
1291/// =>
1292/// ldmdb rn!, <ra, rb, rc>
1293bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
1294 // Thumb1 is already using updating loads/stores.
1295 if (isThumb1) return false;
1296 LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
1297
1298 const MachineOperand &BaseOP = MI->getOperand(0);
1299 Register Base = BaseOP.getReg();
1300 bool BaseKill = BaseOP.isKill();
1301 Register PredReg;
1302 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1303 unsigned Opcode = MI->getOpcode();
1304 DebugLoc DL = MI->getDebugLoc();
1305
1306 // Can't use an updating ld/st if the base register is also a dest
1307 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1308 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
1309 if (MO.getReg() == Base)
1310 return false;
1311
1312 int Bytes = getLSMultipleTransferSize(MI);
1313 MachineBasicBlock &MBB = *MI->getParent();
1315 int Offset;
1317 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1319 if (Mode == ARM_AM::ia && Offset == -Bytes) {
1320 Mode = ARM_AM::db;
1321 } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
1322 Mode = ARM_AM::da;
1323 } else {
1324 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI);
1325 if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
1326 ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) {
1327
1328 // We couldn't find an inc/dec to merge. But if the base is dead, we
1329 // can still change to a writeback form as that will save us 2 bytes
1330 // of code size. It can create WAW hazards though, so only do it if
1331 // we're minimizing code size.
1332 if (!STI->hasMinSize() || !BaseKill)
1333 return false;
1334
1335 bool HighRegsUsed = false;
1336 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
1337 if (MO.getReg() >= ARM::R8) {
1338 HighRegsUsed = true;
1339 break;
1340 }
1341
1342 if (!HighRegsUsed)
1343 MergeInstr = MBB.end();
1344 else
1345 return false;
1346 }
1347 }
1348 if (MergeInstr != MBB.end()) {
1349 LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
1350 MBB.erase(MergeInstr);
1351 }
1352
1353 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1354 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1355 .addReg(Base, getDefRegState(true)) // WB base register
1356 .addReg(Base, getKillRegState(BaseKill))
1357 .addImm(Pred).addReg(PredReg);
1358
1359 // Transfer the rest of operands.
1360 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 3))
1361 MIB.add(MO);
1362
1363 // Transfer memoperands.
1364 MIB.setMemRefs(MI->memoperands());
1365
1366 LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);
1367 MBB.erase(MBBI);
1368 return true;
1369}
1370
1371static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1373 switch (Opc) {
1374 case ARM::LDRi12:
1375 return ARM::LDR_PRE_IMM;
1376 case ARM::STRi12:
1377 return ARM::STR_PRE_IMM;
1378 case ARM::VLDRS:
1379 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1380 case ARM::VLDRD:
1381 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1382 case ARM::VSTRS:
1383 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1384 case ARM::VSTRD:
1385 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1386 case ARM::t2LDRi8:
1387 case ARM::t2LDRi12:
1388 return ARM::t2LDR_PRE;
1389 case ARM::t2STRi8:
1390 case ARM::t2STRi12:
1391 return ARM::t2STR_PRE;
1392 default: llvm_unreachable("Unhandled opcode!");
1393 }
1394}
1395
1396static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1398 switch (Opc) {
1399 case ARM::LDRi12:
1400 return ARM::LDR_POST_IMM;
1401 case ARM::STRi12:
1402 return ARM::STR_POST_IMM;
1403 case ARM::VLDRS:
1404 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1405 case ARM::VLDRD:
1406 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1407 case ARM::VSTRS:
1408 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1409 case ARM::VSTRD:
1410 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1411 case ARM::t2LDRi8:
1412 case ARM::t2LDRi12:
1413 return ARM::t2LDR_POST;
1414 case ARM::t2LDRBi8:
1415 case ARM::t2LDRBi12:
1416 return ARM::t2LDRB_POST;
1417 case ARM::t2LDRSBi8:
1418 case ARM::t2LDRSBi12:
1419 return ARM::t2LDRSB_POST;
1420 case ARM::t2LDRHi8:
1421 case ARM::t2LDRHi12:
1422 return ARM::t2LDRH_POST;
1423 case ARM::t2LDRSHi8:
1424 case ARM::t2LDRSHi12:
1425 return ARM::t2LDRSH_POST;
1426 case ARM::t2STRi8:
1427 case ARM::t2STRi12:
1428 return ARM::t2STR_POST;
1429 case ARM::t2STRBi8:
1430 case ARM::t2STRBi12:
1431 return ARM::t2STRB_POST;
1432 case ARM::t2STRHi8:
1433 case ARM::t2STRHi12:
1434 return ARM::t2STRH_POST;
1435
1436 case ARM::MVE_VLDRBS16:
1437 return ARM::MVE_VLDRBS16_post;
1438 case ARM::MVE_VLDRBS32:
1439 return ARM::MVE_VLDRBS32_post;
1440 case ARM::MVE_VLDRBU16:
1441 return ARM::MVE_VLDRBU16_post;
1442 case ARM::MVE_VLDRBU32:
1443 return ARM::MVE_VLDRBU32_post;
1444 case ARM::MVE_VLDRHS32:
1445 return ARM::MVE_VLDRHS32_post;
1446 case ARM::MVE_VLDRHU32:
1447 return ARM::MVE_VLDRHU32_post;
1448 case ARM::MVE_VLDRBU8:
1449 return ARM::MVE_VLDRBU8_post;
1450 case ARM::MVE_VLDRHU16:
1451 return ARM::MVE_VLDRHU16_post;
1452 case ARM::MVE_VLDRWU32:
1453 return ARM::MVE_VLDRWU32_post;
1454 case ARM::MVE_VSTRB16:
1455 return ARM::MVE_VSTRB16_post;
1456 case ARM::MVE_VSTRB32:
1457 return ARM::MVE_VSTRB32_post;
1458 case ARM::MVE_VSTRH32:
1459 return ARM::MVE_VSTRH32_post;
1460 case ARM::MVE_VSTRBU8:
1461 return ARM::MVE_VSTRBU8_post;
1462 case ARM::MVE_VSTRHU16:
1463 return ARM::MVE_VSTRHU16_post;
1464 case ARM::MVE_VSTRWU32:
1465 return ARM::MVE_VSTRWU32_post;
1466
1467 default: llvm_unreachable("Unhandled opcode!");
1468 }
1469}
1470
1471/// Fold proceeding/trailing inc/dec of base register into the
1472/// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1473bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
1474 // Thumb1 doesn't have updating LDR/STR.
1475 // FIXME: Use LDM/STM with single register instead.
1476 if (isThumb1) return false;
1477 LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
1478
1480 bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
1481 unsigned Opcode = MI->getOpcode();
1482 DebugLoc DL = MI->getDebugLoc();
1483 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1484 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1485 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1486 if (isi32Load(Opcode) || isi32Store(Opcode))
1487 if (MI->getOperand(2).getImm() != 0)
1488 return false;
1489 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1490 return false;
1491
1492 // Can't do the merge if the destination register is the same as the would-be
1493 // writeback register.
1494 if (MI->getOperand(0).getReg() == Base)
1495 return false;
1496
1497 Register PredReg;
1498 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1499 int Bytes = getLSMultipleTransferSize(MI);
1500 MachineBasicBlock &MBB = *MI->getParent();
1502 int Offset;
1504 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1505 unsigned NewOpc;
1506 if (!isAM5 && Offset == Bytes) {
1507 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1508 } else if (Offset == -Bytes) {
1509 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1510 } else {
1511 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI);
1512 if (MergeInstr == MBB.end())
1513 return false;
1514
1516 if ((isAM5 && Offset != Bytes) ||
1517 (!isAM5 && !isLegalAddressImm(NewOpc, Offset, TII))) {
1519 if (isAM5 || !isLegalAddressImm(NewOpc, Offset, TII))
1520 return false;
1521 }
1522 }
1523 LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
1524 MBB.erase(MergeInstr);
1525
1527
1528 bool isLd = isLoadSingle(Opcode);
1529 if (isAM5) {
1530 // VLDM[SD]_UPD, VSTM[SD]_UPD
1531 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1532 // updating load/store-multiple instructions can be used with only one
1533 // register.)
1534 MachineOperand &MO = MI->getOperand(0);
1535 auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1536 .addReg(Base, getDefRegState(true)) // WB base register
1537 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1538 .addImm(Pred)
1539 .addReg(PredReg)
1540 .addReg(MO.getReg(), (isLd ? getDefRegState(true)
1541 : getKillRegState(MO.isKill())))
1542 .cloneMemRefs(*MI);
1543 (void)MIB;
1544 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1545 } else if (isLd) {
1546 if (isAM2) {
1547 // LDR_PRE, LDR_POST
1548 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1549 auto MIB =
1550 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1551 .addReg(Base, RegState::Define)
1552 .addReg(Base)
1553 .addImm(Offset)
1554 .addImm(Pred)
1555 .addReg(PredReg)
1556 .cloneMemRefs(*MI);
1557 (void)MIB;
1558 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1559 } else {
1561 auto MIB =
1562 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1563 .addReg(Base, RegState::Define)
1564 .addReg(Base)
1565 .addReg(0)
1566 .addImm(Imm)
1567 .add(predOps(Pred, PredReg))
1568 .cloneMemRefs(*MI);
1569 (void)MIB;
1570 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1571 }
1572 } else {
1573 // t2LDR_PRE, t2LDR_POST
1574 auto MIB =
1575 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1576 .addReg(Base, RegState::Define)
1577 .addReg(Base)
1578 .addImm(Offset)
1579 .add(predOps(Pred, PredReg))
1580 .cloneMemRefs(*MI);
1581 (void)MIB;
1582 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1583 }
1584 } else {
1585 MachineOperand &MO = MI->getOperand(0);
1586 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1587 // the vestigial zero-reg offset register. When that's fixed, this clause
1588 // can be removed entirely.
1589 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1591 // STR_PRE, STR_POST
1592 auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1593 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1594 .addReg(Base)
1595 .addReg(0)
1596 .addImm(Imm)
1597 .add(predOps(Pred, PredReg))
1598 .cloneMemRefs(*MI);
1599 (void)MIB;
1600 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1601 } else {
1602 // t2STR_PRE, t2STR_POST
1603 auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1604 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1605 .addReg(Base)
1606 .addImm(Offset)
1607 .add(predOps(Pred, PredReg))
1608 .cloneMemRefs(*MI);
1609 (void)MIB;
1610 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1611 }
1612 }
1613 MBB.erase(MBBI);
1614
1615 return true;
1616}
1617
1618bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
1619 unsigned Opcode = MI.getOpcode();
1620 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
1621 "Must have t2STRDi8 or t2LDRDi8");
1622 if (MI.getOperand(3).getImm() != 0)
1623 return false;
1624 LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << MI);
1625
1626 // Behaviour for writeback is undefined if base register is the same as one
1627 // of the others.
1628 const MachineOperand &BaseOp = MI.getOperand(2);
1629 Register Base = BaseOp.getReg();
1630 const MachineOperand &Reg0Op = MI.getOperand(0);
1631 const MachineOperand &Reg1Op = MI.getOperand(1);
1632 if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
1633 return false;
1634
1635 Register PredReg;
1636 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1638 MachineBasicBlock &MBB = *MI.getParent();
1639 int Offset;
1641 PredReg, Offset);
1642 unsigned NewOpc;
1643 if (Offset == 8 || Offset == -8) {
1644 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
1645 } else {
1646 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI);
1647 if (MergeInstr == MBB.end())
1648 return false;
1649 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
1650 if (!isLegalAddressImm(NewOpc, Offset, TII))
1651 return false;
1652 }
1653 LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
1654 MBB.erase(MergeInstr);
1655
1656 DebugLoc DL = MI.getDebugLoc();
1657 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
1658 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
1659 MIB.add(Reg0Op).add(Reg1Op).addReg(BaseOp.getReg(), RegState::Define);
1660 } else {
1661 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
1662 MIB.addReg(BaseOp.getReg(), RegState::Define).add(Reg0Op).add(Reg1Op);
1663 }
1664 MIB.addReg(BaseOp.getReg(), RegState::Kill)
1665 .addImm(Offset).addImm(Pred).addReg(PredReg);
1666 assert(TII->get(Opcode).getNumOperands() == 6 &&
1667 TII->get(NewOpc).getNumOperands() == 7 &&
1668 "Unexpected number of operands in Opcode specification.");
1669
1670 // Transfer implicit operands.
1671 for (const MachineOperand &MO : MI.implicit_operands())
1672 MIB.add(MO);
1673 MIB.cloneMemRefs(MI);
1674
1675 LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);
1676 MBB.erase(MBBI);
1677 return true;
1678}
1679
1680/// Returns true if instruction is a memory operation that this pass is capable
1681/// of operating on.
1682static bool isMemoryOp(const MachineInstr &MI) {
1683 unsigned Opcode = MI.getOpcode();
1684 switch (Opcode) {
1685 case ARM::VLDRS:
1686 case ARM::VSTRS:
1687 case ARM::VLDRD:
1688 case ARM::VSTRD:
1689 case ARM::LDRi12:
1690 case ARM::STRi12:
1691 case ARM::tLDRi:
1692 case ARM::tSTRi:
1693 case ARM::tLDRspi:
1694 case ARM::tSTRspi:
1695 case ARM::t2LDRi8:
1696 case ARM::t2LDRi12:
1697 case ARM::t2STRi8:
1698 case ARM::t2STRi12:
1699 break;
1700 default:
1701 return false;
1702 }
1703 if (!MI.getOperand(1).isReg())
1704 return false;
1705
1706 // When no memory operands are present, conservatively assume unaligned,
1707 // volatile, unfoldable.
1708 if (!MI.hasOneMemOperand())
1709 return false;
1710
1711 const MachineMemOperand &MMO = **MI.memoperands_begin();
1712
1713 // Don't touch volatile memory accesses - we may be changing their order.
1714 // TODO: We could allow unordered and monotonic atomics here, but we need to
1715 // make sure the resulting ldm/stm is correctly marked as atomic.
1716 if (MMO.isVolatile() || MMO.isAtomic())
1717 return false;
1718
1719 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1720 // not.
1721 if (MMO.getAlign() < Align(4))
1722 return false;
1723
1724 // str <undef> could probably be eliminated entirely, but for now we just want
1725 // to avoid making a mess of it.
1726 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1727 if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
1728 return false;
1729
1730 // Likewise don't mess with references to undefined addresses.
1731 if (MI.getOperand(1).isUndef())
1732 return false;
1733
1734 return true;
1735}
1736
1739 bool isDef, unsigned NewOpc, unsigned Reg,
1740 bool RegDeadKill, bool RegUndef, unsigned BaseReg,
1741 bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred,
1742 unsigned PredReg, const TargetInstrInfo *TII,
1743 MachineInstr *MI) {
1744 if (isDef) {
1745 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1746 TII->get(NewOpc))
1747 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1748 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1749 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1750 // FIXME: This is overly conservative; the new instruction accesses 4
1751 // bytes, not 8.
1752 MIB.cloneMemRefs(*MI);
1753 } else {
1754 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1755 TII->get(NewOpc))
1756 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1757 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1758 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1759 // FIXME: This is overly conservative; the new instruction accesses 4
1760 // bytes, not 8.
1761 MIB.cloneMemRefs(*MI);
1762 }
1763}
1764
1765bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1767 MachineInstr *MI = &*MBBI;
1768 unsigned Opcode = MI->getOpcode();
1769 // FIXME: Code/comments below check Opcode == t2STRDi8, but this check returns
1770 // if we see this opcode.
1771 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1772 return false;
1773
1774 const MachineOperand &BaseOp = MI->getOperand(2);
1775 Register BaseReg = BaseOp.getReg();
1776 Register EvenReg = MI->getOperand(0).getReg();
1777 Register OddReg = MI->getOperand(1).getReg();
1778 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1779 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1780
1781 // ARM errata 602117: LDRD with base in list may result in incorrect base
1782 // register when interrupted or faulted.
1783 bool Errata602117 = EvenReg == BaseReg &&
1784 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1785 // ARM LDRD/STRD needs consecutive registers.
1786 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1787 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1788
1789 if (!Errata602117 && !NonConsecutiveRegs)
1790 return false;
1791
1792 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1793 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1794 bool EvenDeadKill = isLd ?
1795 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1796 bool EvenUndef = MI->getOperand(0).isUndef();
1797 bool OddDeadKill = isLd ?
1798 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1799 bool OddUndef = MI->getOperand(1).isUndef();
1800 bool BaseKill = BaseOp.isKill();
1801 bool BaseUndef = BaseOp.isUndef();
1802 assert((isT2 || MI->getOperand(3).getReg() == ARM::NoRegister) &&
1803 "register offset not handled below");
1804 int OffImm = getMemoryOpOffset(*MI);
1805 Register PredReg;
1806 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1807
1808 if (OddRegNum > EvenRegNum && OffImm == 0) {
1809 // Ascending register numbers and no offset. It's safe to change it to a
1810 // ldm or stm.
1811 unsigned NewOpc = (isLd)
1812 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1813 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1814 if (isLd) {
1815 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1816 .addReg(BaseReg, getKillRegState(BaseKill))
1817 .addImm(Pred).addReg(PredReg)
1818 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1819 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill))
1820 .cloneMemRefs(*MI);
1821 ++NumLDRD2LDM;
1822 } else {
1823 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1824 .addReg(BaseReg, getKillRegState(BaseKill))
1825 .addImm(Pred).addReg(PredReg)
1826 .addReg(EvenReg,
1827 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1828 .addReg(OddReg,
1829 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef))
1830 .cloneMemRefs(*MI);
1831 ++NumSTRD2STM;
1832 }
1833 } else {
1834 // Split into two instructions.
1835 unsigned NewOpc = (isLd)
1836 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1837 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1838 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1839 // so adjust and use t2LDRi12 here for that.
1840 unsigned NewOpc2 = (isLd)
1841 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1842 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1843 // If this is a load, make sure the first load does not clobber the base
1844 // register before the second load reads it.
1845 if (isLd && TRI->regsOverlap(EvenReg, BaseReg)) {
1846 assert(!TRI->regsOverlap(OddReg, BaseReg));
1847 InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
1848 false, BaseReg, false, BaseUndef, Pred, PredReg, TII, MI);
1849 InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
1850 false, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
1851 MI);
1852 } else {
1853 if (OddReg == EvenReg && EvenDeadKill) {
1854 // If the two source operands are the same, the kill marker is
1855 // probably on the first one. e.g.
1856 // t2STRDi8 killed %r5, %r5, killed %r9, 0, 14, %reg0
1857 EvenDeadKill = false;
1858 OddDeadKill = true;
1859 }
1860 // Never kill the base register in the first instruction.
1861 if (EvenReg == BaseReg)
1862 EvenDeadKill = false;
1863 InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
1864 EvenUndef, BaseReg, false, BaseUndef, Pred, PredReg, TII,
1865 MI);
1866 InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
1867 OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
1868 MI);
1869 }
1870 if (isLd)
1871 ++NumLDRD2LDR;
1872 else
1873 ++NumSTRD2STR;
1874 }
1875
1876 MBBI = MBB.erase(MBBI);
1877 return true;
1878}
1879
1880/// An optimization pass to turn multiple LDR / STR ops of the same base and
1881/// incrementing offset into LDM / STM ops.
1882bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1883 MemOpQueue MemOps;
1884 unsigned CurrBase = 0;
1885 unsigned CurrOpc = ~0u;
1886 ARMCC::CondCodes CurrPred = ARMCC::AL;
1887 unsigned Position = 0;
1888 assert(Candidates.size() == 0);
1889 assert(MergeBaseCandidates.size() == 0);
1890 LiveRegsValid = false;
1891
1893 I = MBBI) {
1894 // The instruction in front of the iterator is the one we look at.
1895 MBBI = std::prev(I);
1896 if (FixInvalidRegPairOp(MBB, MBBI))
1897 continue;
1898 ++Position;
1899
1900 if (isMemoryOp(*MBBI)) {
1901 unsigned Opcode = MBBI->getOpcode();
1902 const MachineOperand &MO = MBBI->getOperand(0);
1903 Register Reg = MO.getReg();
1905 Register PredReg;
1906 ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg);
1908 if (CurrBase == 0) {
1909 // Start of a new chain.
1910 CurrBase = Base;
1911 CurrOpc = Opcode;
1912 CurrPred = Pred;
1913 MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
1914 continue;
1915 }
1916 // Note: No need to match PredReg in the next if.
1917 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1918 // Watch out for:
1919 // r4 := ldr [r0, #8]
1920 // r4 := ldr [r0, #4]
1921 // or
1922 // r0 := ldr [r0]
1923 // If a load overrides the base register or a register loaded by
1924 // another load in our chain, we cannot take this instruction.
1925 bool Overlap = false;
1926 if (isLoadSingle(Opcode)) {
1927 Overlap = (Base == Reg);
1928 if (!Overlap) {
1929 for (const MemOpQueueEntry &E : MemOps) {
1930 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
1931 Overlap = true;
1932 break;
1933 }
1934 }
1935 }
1936 }
1937
1938 if (!Overlap) {
1939 // Check offset and sort memory operation into the current chain.
1940 if (Offset > MemOps.back().Offset) {
1941 MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
1942 continue;
1943 } else {
1944 MemOpQueue::iterator MI, ME;
1945 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
1946 if (Offset < MI->Offset) {
1947 // Found a place to insert.
1948 break;
1949 }
1950 if (Offset == MI->Offset) {
1951 // Collision, abort.
1952 MI = ME;
1953 break;
1954 }
1955 }
1956 if (MI != MemOps.end()) {
1957 MemOps.insert(MI, MemOpQueueEntry(*MBBI, Offset, Position));
1958 continue;
1959 }
1960 }
1961 }
1962 }
1963
1964 // Don't advance the iterator; The op will start a new chain next.
1965 MBBI = I;
1966 --Position;
1967 // Fallthrough to look into existing chain.
1968 } else if (MBBI->isDebugInstr()) {
1969 continue;
1970 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
1971 MBBI->getOpcode() == ARM::t2STRDi8) {
1972 // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
1973 // remember them because we may still be able to merge add/sub into them.
1974 MergeBaseCandidates.push_back(&*MBBI);
1975 }
1976
1977 // If we are here then the chain is broken; Extract candidates for a merge.
1978 if (MemOps.size() > 0) {
1979 FormCandidates(MemOps);
1980 // Reset for the next chain.
1981 CurrBase = 0;
1982 CurrOpc = ~0u;
1983 CurrPred = ARMCC::AL;
1984 MemOps.clear();
1985 }
1986 }
1987 if (MemOps.size() > 0)
1988 FormCandidates(MemOps);
1989
1990 // Sort candidates so they get processed from end to begin of the basic
1991 // block later; This is necessary for liveness calculation.
1992 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
1993 return M0->InsertPos < M1->InsertPos;
1994 };
1995 llvm::sort(Candidates, LessThan);
1996
1997 // Go through list of candidates and merge.
1998 bool Changed = false;
1999 for (const MergeCandidate *Candidate : Candidates) {
2000 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
2001 MachineInstr *Merged = MergeOpsUpdate(*Candidate);
2002 // Merge preceding/trailing base inc/dec into the merged op.
2003 if (Merged) {
2004 Changed = true;
2005 unsigned Opcode = Merged->getOpcode();
2006 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
2007 MergeBaseUpdateLSDouble(*Merged);
2008 else
2009 MergeBaseUpdateLSMultiple(Merged);
2010 } else {
2011 for (MachineInstr *MI : Candidate->Instrs) {
2012 if (MergeBaseUpdateLoadStore(MI))
2013 Changed = true;
2014 }
2015 }
2016 } else {
2017 assert(Candidate->Instrs.size() == 1);
2018 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
2019 Changed = true;
2020 }
2021 }
2022 Candidates.clear();
2023 // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
2024 for (MachineInstr *MI : MergeBaseCandidates)
2025 MergeBaseUpdateLSDouble(*MI);
2026 MergeBaseCandidates.clear();
2027
2028 return Changed;
2029}
2030
2031/// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
2032/// into the preceding stack restore so it directly restore the value of LR
2033/// into pc.
2034/// ldmfd sp!, {..., lr}
2035/// bx lr
2036/// or
2037/// ldmfd sp!, {..., lr}
2038/// mov pc, lr
2039/// =>
2040/// ldmfd sp!, {..., pc}
2041bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
2042 // Thumb1 LDM doesn't allow high registers.
2043 if (isThumb1) return false;
2044 if (MBB.empty()) return false;
2045
2047 if (MBBI != MBB.begin() && MBBI != MBB.end() &&
2048 (MBBI->getOpcode() == ARM::BX_RET ||
2049 MBBI->getOpcode() == ARM::tBX_RET ||
2050 MBBI->getOpcode() == ARM::MOVPCLR)) {
2051 MachineBasicBlock::iterator PrevI = std::prev(MBBI);
2052 // Ignore any debug instructions.
2053 while (PrevI->isDebugInstr() && PrevI != MBB.begin())
2054 --PrevI;
2055 MachineInstr &PrevMI = *PrevI;
2056 unsigned Opcode = PrevMI.getOpcode();
2057 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
2058 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
2059 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
2060 MachineOperand &MO = PrevMI.getOperand(PrevMI.getNumOperands() - 1);
2061 if (MO.getReg() != ARM::LR)
2062 return false;
2063 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
2064 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
2065 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
2066 PrevMI.setDesc(TII->get(NewOpc));
2067 MO.setReg(ARM::PC);
2068 PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI);
2069 MBB.erase(MBBI);
2070 return true;
2071 }
2072 }
2073 return false;
2074}
2075
2076bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
2078 if (MBBI == MBB.begin() || MBBI == MBB.end() ||
2079 MBBI->getOpcode() != ARM::tBX_RET)
2080 return false;
2081
2083 --Prev;
2084 if (Prev->getOpcode() != ARM::tMOVr ||
2085 !Prev->definesRegister(ARM::LR, /*TRI=*/nullptr))
2086 return false;
2087
2088 for (auto Use : Prev->uses())
2089 if (Use.isKill()) {
2090 assert(STI->hasV4TOps());
2091 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
2092 .addReg(Use.getReg(), RegState::Kill)
2095 MBB.erase(MBBI);
2096 MBB.erase(Prev);
2097 return true;
2098 }
2099
2100 llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
2101}
2102
2103bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
2104 MF = &Fn;
2105 STI = &Fn.getSubtarget<ARMSubtarget>();
2106 TL = STI->getTargetLowering();
2107 AFI = Fn.getInfo<ARMFunctionInfo>();
2108 TII = STI->getInstrInfo();
2109 TRI = STI->getRegisterInfo();
2110
2111 RegClassInfoValid = false;
2112 isThumb2 = AFI->isThumb2Function();
2113 isThumb1 = AFI->isThumbFunction() && !isThumb2;
2114
2115 bool Modified = false, ModifiedLDMReturn = false;
2116 for (MachineBasicBlock &MBB : Fn) {
2117 Modified |= LoadStoreMultipleOpti(MBB);
2118 if (STI->hasV5TOps() && !AFI->shouldSignReturnAddress())
2119 ModifiedLDMReturn |= MergeReturnIntoLDM(MBB);
2120 if (isThumb1)
2121 Modified |= CombineMovBx(MBB);
2122 }
2123 Modified |= ModifiedLDMReturn;
2124
2125 // If we merged a BX instruction into an LDM, we need to re-calculate whether
2126 // LR is restored. This check needs to consider the whole function, not just
2127 // the instruction(s) we changed, because there may be other BX returns which
2128 // still need LR to be restored.
2129 if (ModifiedLDMReturn)
2131
2132 Allocator.DestroyAll();
2133 return Modified;
2134}
2135
2136bool ARMLoadStoreOptLegacy::runOnMachineFunction(MachineFunction &MF) {
2137 if (skipFunction(MF.getFunction()))
2138 return false;
2139 ARMLoadStoreOpt Impl;
2140 return Impl.runOnMachineFunction(MF);
2141}
2142
2143#define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
2144 "ARM pre- register allocation load / store optimization pass"
2145
2146namespace {
2147
2148/// Pre- register allocation pass that move load / stores from consecutive
2149/// locations close to make it more likely they will be combined later.
2150struct ARMPreAllocLoadStoreOpt {
2152 const DataLayout *TD;
2153 const TargetInstrInfo *TII;
2154 const TargetRegisterInfo *TRI;
2155 const ARMSubtarget *STI;
2158 MachineFunction *MF;
2159
2160 bool runOnMachineFunction(MachineFunction &Fn, AliasAnalysis *AA,
2162
2163private:
2164 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
2165 unsigned &NewOpc, Register &EvenReg, Register &OddReg,
2166 Register &BaseReg, int &Offset, Register &PredReg,
2167 ARMCC::CondCodes &Pred, bool &isT2);
2168 bool RescheduleOps(
2170 unsigned Base, bool isLd, DenseMap<MachineInstr *, unsigned> &MI2LocMap,
2172 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
2173 bool DistributeIncrements();
2174 bool DistributeIncrements(Register Base);
2175};
2176
2177struct ARMPreAllocLoadStoreOptLegacy : public MachineFunctionPass {
2178 static char ID;
2179
2180 ARMPreAllocLoadStoreOptLegacy() : MachineFunctionPass(ID) {}
2181
2182 bool runOnMachineFunction(MachineFunction &Fn) override;
2183
2184 StringRef getPassName() const override {
2186 }
2187
2188 void getAnalysisUsage(AnalysisUsage &AU) const override {
2193 }
2194};
2195
2196char ARMPreAllocLoadStoreOptLegacy::ID = 0;
2197
2198} // end anonymous namespace
2199
2200INITIALIZE_PASS_BEGIN(ARMPreAllocLoadStoreOptLegacy, "arm-prera-ldst-opt",
2203INITIALIZE_PASS_END(ARMPreAllocLoadStoreOptLegacy, "arm-prera-ldst-opt",
2205
2206// Limit the number of instructions to be rescheduled.
2207// FIXME: tune this limit, and/or come up with some better heuristics.
2208static cl::opt<unsigned> InstReorderLimit("arm-prera-ldst-opt-reorder-limit",
2209 cl::init(8), cl::Hidden);
2210
2211bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn,
2212 AliasAnalysis *AAIn,
2213 MachineDominatorTree *DTIn) {
2215 return false;
2216
2217 AA = AAIn;
2218 DT = DTIn;
2219 TD = &Fn.getDataLayout();
2220 STI = &Fn.getSubtarget<ARMSubtarget>();
2221 TII = STI->getInstrInfo();
2222 TRI = STI->getRegisterInfo();
2223 MRI = &Fn.getRegInfo();
2224 MF = &Fn;
2225
2226 bool Modified = DistributeIncrements();
2227 for (MachineBasicBlock &MFI : Fn)
2228 Modified |= RescheduleLoadStoreInstrs(&MFI);
2229
2230 return Modified;
2231}
2232
2233bool ARMPreAllocLoadStoreOptLegacy::runOnMachineFunction(MachineFunction &Fn) {
2234 if (skipFunction(Fn.getFunction()))
2235 return false;
2236
2237 ARMPreAllocLoadStoreOpt Impl;
2238 AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2239 MachineDominatorTree *DT =
2240 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
2241 return Impl.runOnMachineFunction(Fn, AA, DT);
2242}
2243
2244static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
2248 SmallSet<unsigned, 4> &MemRegs,
2249 const TargetRegisterInfo *TRI,
2250 AliasAnalysis *AA) {
2251 // Are there stores / loads / calls between them?
2252 SmallSet<unsigned, 4> AddedRegPressure;
2253 while (++I != E) {
2254 if (I->isDebugInstr() || MemOps.count(&*I))
2255 continue;
2256 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
2257 return false;
2258 if (I->mayStore() || (!isLd && I->mayLoad()))
2259 for (MachineInstr *MemOp : MemOps)
2260 if (I->mayAlias(AA, *MemOp, /*UseTBAA*/ false))
2261 return false;
2262 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
2263 MachineOperand &MO = I->getOperand(j);
2264 if (!MO.isReg())
2265 continue;
2266 Register Reg = MO.getReg();
2267 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
2268 return false;
2269 if (Reg != Base && !MemRegs.count(Reg))
2270 AddedRegPressure.insert(Reg);
2271 }
2272 }
2273
2274 // Estimate register pressure increase due to the transformation.
2275 if (MemRegs.size() <= 4)
2276 // Ok if we are moving small number of instructions.
2277 return true;
2278 return AddedRegPressure.size() <= MemRegs.size() * 2;
2279}
2280
2281bool ARMPreAllocLoadStoreOpt::CanFormLdStDWord(
2282 MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, unsigned &NewOpc,
2283 Register &FirstReg, Register &SecondReg, Register &BaseReg, int &Offset,
2284 Register &PredReg, ARMCC::CondCodes &Pred, bool &isT2) {
2285 // Make sure we're allowed to generate LDRD/STRD.
2286 if (!STI->hasV5TEOps())
2287 return false;
2288
2289 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
2290 unsigned Scale = 1;
2291 unsigned Opcode = Op0->getOpcode();
2292 if (Opcode == ARM::LDRi12) {
2293 NewOpc = ARM::LDRD;
2294 } else if (Opcode == ARM::STRi12) {
2295 NewOpc = ARM::STRD;
2296 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
2297 NewOpc = ARM::t2LDRDi8;
2298 Scale = 4;
2299 isT2 = true;
2300 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
2301 NewOpc = ARM::t2STRDi8;
2302 Scale = 4;
2303 isT2 = true;
2304 } else {
2305 return false;
2306 }
2307
2308 // Make sure the base address satisfies i64 ld / st alignment requirement.
2309 // At the moment, we ignore the memoryoperand's value.
2310 // If we want to use AliasAnalysis, we should check it accordingly.
2311 if (!Op0->hasOneMemOperand() ||
2312 (*Op0->memoperands_begin())->isVolatile() ||
2313 (*Op0->memoperands_begin())->isAtomic())
2314 return false;
2315
2316 Align Alignment = (*Op0->memoperands_begin())->getAlign();
2317 Align ReqAlign = STI->getDualLoadStoreAlignment();
2318 if (Alignment < ReqAlign)
2319 return false;
2320
2321 // Then make sure the immediate offset fits.
2322 int OffImm = getMemoryOpOffset(*Op0);
2323 if (isT2) {
2324 int Limit = (1 << 8) * Scale;
2325 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2326 return false;
2327 Offset = OffImm;
2328 } else {
2330 if (OffImm < 0) {
2332 OffImm = - OffImm;
2333 }
2334 int Limit = (1 << 8) * Scale;
2335 if (OffImm >= Limit || (OffImm & (Scale-1)))
2336 return false;
2337 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2338 }
2339 FirstReg = Op0->getOperand(0).getReg();
2340 SecondReg = Op1->getOperand(0).getReg();
2341 if (FirstReg == SecondReg)
2342 return false;
2343 BaseReg = Op0->getOperand(1).getReg();
2344 Pred = getInstrPredicate(*Op0, PredReg);
2345 dl = Op0->getDebugLoc();
2346 return true;
2347}
2348
2349bool ARMPreAllocLoadStoreOpt::RescheduleOps(
2350 MachineBasicBlock *MBB, SmallVectorImpl<MachineInstr *> &Ops, unsigned Base,
2351 bool isLd, DenseMap<MachineInstr *, unsigned> &MI2LocMap,
2352 SmallDenseMap<Register, SmallVector<MachineInstr *>, 8> &RegisterMap) {
2353 bool RetVal = false;
2354
2355 // Sort by offset (in reverse order).
2356 llvm::sort(Ops, [](const MachineInstr *LHS, const MachineInstr *RHS) {
2357 int LOffset = getMemoryOpOffset(*LHS);
2358 int ROffset = getMemoryOpOffset(*RHS);
2359 assert(LHS == RHS || LOffset != ROffset);
2360 return LOffset > ROffset;
2361 });
2362
2363 // The loads / stores of the same base are in order. Scan them from first to
2364 // last and check for the following:
2365 // 1. Any def of base.
2366 // 2. Any gaps.
2367 while (Ops.size() > 1) {
2368 unsigned FirstLoc = ~0U;
2369 unsigned LastLoc = 0;
2370 MachineInstr *FirstOp = nullptr;
2371 MachineInstr *LastOp = nullptr;
2372 int LastOffset = 0;
2373 unsigned LastOpcode = 0;
2374 unsigned LastBytes = 0;
2375 unsigned NumMove = 0;
2376 for (MachineInstr *Op : llvm::reverse(Ops)) {
2377 // Make sure each operation has the same kind.
2378 unsigned LSMOpcode
2379 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2380 if (LastOpcode && LSMOpcode != LastOpcode)
2381 break;
2382
2383 // Check that we have a continuous set of offsets.
2384 int Offset = getMemoryOpOffset(*Op);
2385 unsigned Bytes = getLSMultipleTransferSize(Op);
2386 if (LastBytes) {
2387 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2388 break;
2389 }
2390
2391 // Don't try to reschedule too many instructions.
2392 if (NumMove == InstReorderLimit)
2393 break;
2394
2395 // Found a mergeable instruction; save information about it.
2396 ++NumMove;
2397 LastOffset = Offset;
2398 LastBytes = Bytes;
2399 LastOpcode = LSMOpcode;
2400
2401 unsigned Loc = MI2LocMap[Op];
2402 if (Loc <= FirstLoc) {
2403 FirstLoc = Loc;
2404 FirstOp = Op;
2405 }
2406 if (Loc >= LastLoc) {
2407 LastLoc = Loc;
2408 LastOp = Op;
2409 }
2410 }
2411
2412 if (NumMove <= 1)
2413 Ops.pop_back();
2414 else {
2415 SmallPtrSet<MachineInstr*, 4> MemOps;
2416 SmallSet<unsigned, 4> MemRegs;
2417 for (size_t i = Ops.size() - NumMove, e = Ops.size(); i != e; ++i) {
2418 MemOps.insert(Ops[i]);
2419 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2420 }
2421
2422 // Be conservative, if the instructions are too far apart, don't
2423 // move them. We want to limit the increase of register pressure.
2424 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2425 if (DoMove)
2426 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2427 MemOps, MemRegs, TRI, AA);
2428 if (!DoMove) {
2429 for (unsigned i = 0; i != NumMove; ++i)
2430 Ops.pop_back();
2431 } else {
2432 // This is the new location for the loads / stores.
2433 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2434 while (InsertPos != MBB->end() &&
2435 (MemOps.count(&*InsertPos) || InsertPos->isDebugInstr()))
2436 ++InsertPos;
2437
2438 // If we are moving a pair of loads / stores, see if it makes sense
2439 // to try to allocate a pair of registers that can form register pairs.
2440 MachineInstr *Op0 = Ops.back();
2441 MachineInstr *Op1 = Ops[Ops.size()-2];
2442 Register FirstReg, SecondReg;
2443 Register BaseReg, PredReg;
2445 bool isT2 = false;
2446 unsigned NewOpc = 0;
2447 int Offset = 0;
2448 DebugLoc dl;
2449 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2450 FirstReg, SecondReg, BaseReg,
2451 Offset, PredReg, Pred, isT2)) {
2452 Ops.pop_back();
2453 Ops.pop_back();
2454
2455 const MCInstrDesc &MCID = TII->get(NewOpc);
2456 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0);
2457 MRI->constrainRegClass(FirstReg, TRC);
2458 MRI->constrainRegClass(SecondReg, TRC);
2459
2460 // Form the pair instruction.
2461 if (isLd) {
2462 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2463 .addReg(FirstReg, RegState::Define)
2464 .addReg(SecondReg, RegState::Define)
2465 .addReg(BaseReg);
2466 // FIXME: We're converting from LDRi12 to an insn that still
2467 // uses addrmode2, so we need an explicit offset reg. It should
2468 // always by reg0 since we're transforming LDRi12s.
2469 if (!isT2)
2470 MIB.addReg(0);
2471 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2472 MIB.cloneMergedMemRefs({Op0, Op1});
2473 LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
2474 ++NumLDRDFormed;
2475 } else {
2476 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2477 .addReg(FirstReg)
2478 .addReg(SecondReg)
2479 .addReg(BaseReg);
2480 // FIXME: We're converting from LDRi12 to an insn that still
2481 // uses addrmode2, so we need an explicit offset reg. It should
2482 // always by reg0 since we're transforming STRi12s.
2483 if (!isT2)
2484 MIB.addReg(0);
2485 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2486 MIB.cloneMergedMemRefs({Op0, Op1});
2487 LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
2488 ++NumSTRDFormed;
2489 }
2490 MBB->erase(Op0);
2491 MBB->erase(Op1);
2492
2493 if (!isT2) {
2494 // Add register allocation hints to form register pairs.
2495 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
2496 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
2497 }
2498 } else {
2499 for (unsigned i = 0; i != NumMove; ++i) {
2500 MachineInstr *Op = Ops.pop_back_val();
2501 if (isLd) {
2502 // Populate RegisterMap with all Registers defined by loads.
2503 Register Reg = Op->getOperand(0).getReg();
2504 RegisterMap[Reg];
2505 }
2506
2507 MBB->splice(InsertPos, MBB, Op);
2508 }
2509 }
2510
2511 NumLdStMoved += NumMove;
2512 RetVal = true;
2513 }
2514 }
2515 }
2516
2517 return RetVal;
2518}
2519
2521 std::function<void(MachineOperand &)> Fn) {
2522 if (MI->isNonListDebugValue()) {
2523 auto &Op = MI->getOperand(0);
2524 if (Op.isReg())
2525 Fn(Op);
2526 } else {
2527 for (unsigned I = 2; I < MI->getNumOperands(); I++) {
2528 auto &Op = MI->getOperand(I);
2529 if (Op.isReg())
2530 Fn(Op);
2531 }
2532 }
2533}
2534
2535// Update the RegisterMap with the instruction that was moved because a
2536// DBG_VALUE_LIST may need to be moved again.
2539 MachineInstr *DbgValueListInstr, MachineInstr *InstrToReplace) {
2540
2541 forEachDbgRegOperand(DbgValueListInstr, [&](MachineOperand &Op) {
2542 auto RegIt = RegisterMap.find(Op.getReg());
2543 if (RegIt == RegisterMap.end())
2544 return;
2545 auto &InstrVec = RegIt->getSecond();
2546 llvm::replace(InstrVec, InstrToReplace, DbgValueListInstr);
2547 });
2548}
2549
2551 auto DbgVar = DebugVariable(MI->getDebugVariable(), MI->getDebugExpression(),
2552 MI->getDebugLoc()->getInlinedAt());
2553 return DbgVar;
2554}
2555
2556bool
2557ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2558 bool RetVal = false;
2559
2560 DenseMap<MachineInstr *, unsigned> MI2LocMap;
2561 using Base2InstMap = DenseMap<unsigned, SmallVector<MachineInstr *, 4>>;
2562 using BaseVec = SmallVector<unsigned, 4>;
2563 Base2InstMap Base2LdsMap;
2564 Base2InstMap Base2StsMap;
2565 BaseVec LdBases;
2566 BaseVec StBases;
2567 // This map is used to track the relationship between the virtual
2568 // register that is the result of a load that is moved and the DBG_VALUE
2569 // MachineInstr pointer that uses that virtual register.
2570 SmallDenseMap<Register, SmallVector<MachineInstr *>, 8> RegisterMap;
2571
2572 unsigned Loc = 0;
2575 while (MBBI != E) {
2576 for (; MBBI != E; ++MBBI) {
2577 MachineInstr &MI = *MBBI;
2578 if (MI.isCall() || MI.isTerminator()) {
2579 // Stop at barriers.
2580 ++MBBI;
2581 break;
2582 }
2583
2584 if (!MI.isDebugInstr())
2585 MI2LocMap[&MI] = ++Loc;
2586
2587 if (!isMemoryOp(MI))
2588 continue;
2589 Register PredReg;
2590 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2591 continue;
2592
2593 int Opc = MI.getOpcode();
2594 bool isLd = isLoadSingle(Opc);
2595 Register Base = MI.getOperand(1).getReg();
2597 bool StopHere = false;
2598 auto FindBases = [&](Base2InstMap &Base2Ops, BaseVec &Bases) {
2599 auto [BI, Inserted] = Base2Ops.try_emplace(Base);
2600 if (Inserted) {
2601 BI->second.push_back(&MI);
2602 Bases.push_back(Base);
2603 return;
2604 }
2605 for (const MachineInstr *MI : BI->second) {
2606 if (Offset == getMemoryOpOffset(*MI)) {
2607 StopHere = true;
2608 break;
2609 }
2610 }
2611 if (!StopHere)
2612 BI->second.push_back(&MI);
2613 };
2614
2615 if (isLd)
2616 FindBases(Base2LdsMap, LdBases);
2617 else
2618 FindBases(Base2StsMap, StBases);
2619
2620 if (StopHere) {
2621 // Found a duplicate (a base+offset combination that's seen earlier).
2622 // Backtrack.
2623 --Loc;
2624 break;
2625 }
2626 }
2627
2628 // Re-schedule loads.
2629 for (unsigned Base : LdBases) {
2630 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2631 if (Lds.size() > 1)
2632 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap, RegisterMap);
2633 }
2634
2635 // Re-schedule stores.
2636 for (unsigned Base : StBases) {
2637 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2638 if (Sts.size() > 1)
2639 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap, RegisterMap);
2640 }
2641
2642 if (MBBI != E) {
2643 Base2LdsMap.clear();
2644 Base2StsMap.clear();
2645 LdBases.clear();
2646 StBases.clear();
2647 }
2648 }
2649
2650 // Reschedule DBG_VALUEs to match any loads that were moved. When a load is
2651 // sunk beyond a DBG_VALUE that is referring to it, the DBG_VALUE becomes a
2652 // use-before-def, resulting in a loss of debug info.
2653
2654 // Example:
2655 // Before the Pre Register Allocation Load Store Pass
2656 // inst_a
2657 // %2 = ld ...
2658 // inst_b
2659 // DBG_VALUE %2, "x", ...
2660 // %3 = ld ...
2661
2662 // After the Pass:
2663 // inst_a
2664 // inst_b
2665 // DBG_VALUE %2, "x", ...
2666 // %2 = ld ...
2667 // %3 = ld ...
2668
2669 // The code below addresses this by moving the DBG_VALUE to the position
2670 // immediately after the load.
2671
2672 // Example:
2673 // After the code below:
2674 // inst_a
2675 // inst_b
2676 // %2 = ld ...
2677 // DBG_VALUE %2, "x", ...
2678 // %3 = ld ...
2679
2680 // The algorithm works in two phases: First RescheduleOps() populates the
2681 // RegisterMap with registers that were moved as keys, there is no value
2682 // inserted. In the next phase, every MachineInstr in a basic block is
2683 // iterated over. If it is a valid DBG_VALUE or DBG_VALUE_LIST and it uses one
2684 // or more registers in the RegisterMap, the RegisterMap and InstrMap are
2685 // populated with the MachineInstr. If the DBG_VALUE or DBG_VALUE_LIST
2686 // describes debug information for a variable that already exists in the
2687 // DbgValueSinkCandidates, the MachineInstr in the DbgValueSinkCandidates must
2688 // be set to undef. If the current MachineInstr is a load that was moved,
2689 // undef the corresponding DBG_VALUE or DBG_VALUE_LIST and clone it to below
2690 // the load.
2691
2692 // To illustrate the above algorithm visually let's take this example.
2693
2694 // Before the Pre Register Allocation Load Store Pass:
2695 // %2 = ld ...
2696 // DBG_VALUE %2, A, .... # X
2697 // DBG_VALUE 0, A, ... # Y
2698 // %3 = ld ...
2699 // DBG_VALUE %3, A, ..., # Z
2700 // %4 = ld ...
2701
2702 // After Pre Register Allocation Load Store Pass:
2703 // DBG_VALUE %2, A, .... # X
2704 // DBG_VALUE 0, A, ... # Y
2705 // DBG_VALUE %3, A, ..., # Z
2706 // %2 = ld ...
2707 // %3 = ld ...
2708 // %4 = ld ...
2709
2710 // The algorithm below does the following:
2711
2712 // In the beginning, the RegisterMap will have been populated with the virtual
2713 // registers %2, and %3, the DbgValueSinkCandidates and the InstrMap will be
2714 // empty. DbgValueSinkCandidates = {}, RegisterMap = {2 -> {}, 3 -> {}},
2715 // InstrMap {}
2716 // -> DBG_VALUE %2, A, .... # X
2717 // DBG_VALUE 0, A, ... # Y
2718 // DBG_VALUE %3, A, ..., # Z
2719 // %2 = ld ...
2720 // %3 = ld ...
2721 // %4 = ld ...
2722
2723 // After the first DBG_VALUE (denoted with an X) is processed, the
2724 // DbgValueSinkCandidates and InstrMap will be populated and the RegisterMap
2725 // entry for %2 will be populated as well. DbgValueSinkCandidates = {A -> X},
2726 // RegisterMap = {2 -> {X}, 3 -> {}}, InstrMap {X -> 2}
2727 // DBG_VALUE %2, A, .... # X
2728 // -> DBG_VALUE 0, A, ... # Y
2729 // DBG_VALUE %3, A, ..., # Z
2730 // %2 = ld ...
2731 // %3 = ld ...
2732 // %4 = ld ...
2733
2734 // After the DBG_VALUE Y is processed, the DbgValueSinkCandidates is updated
2735 // to now hold Y for A and the RegisterMap is also updated to remove X from
2736 // %2, this is because both X and Y describe the same debug variable A. X is
2737 // also updated to have a $noreg as the first operand.
2738 // DbgValueSinkCandidates = {A -> {Y}}, RegisterMap = {2 -> {}, 3 -> {}},
2739 // InstrMap = {X-> 2}
2740 // DBG_VALUE $noreg, A, .... # X
2741 // DBG_VALUE 0, A, ... # Y
2742 // -> DBG_VALUE %3, A, ..., # Z
2743 // %2 = ld ...
2744 // %3 = ld ...
2745 // %4 = ld ...
2746
2747 // After DBG_VALUE Z is processed, the DbgValueSinkCandidates is updated to
2748 // hold Z fr A, the RegisterMap is updated to hold Z for %3, and the InstrMap
2749 // is updated to have Z mapped to %3. This is again because Z describes the
2750 // debug variable A, Y is not updated to have $noreg as first operand because
2751 // its first operand is an immediate, not a register.
2752 // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
2753 // InstrMap = {X -> 2, Z -> 3}
2754 // DBG_VALUE $noreg, A, .... # X
2755 // DBG_VALUE 0, A, ... # Y
2756 // DBG_VALUE %3, A, ..., # Z
2757 // -> %2 = ld ...
2758 // %3 = ld ...
2759 // %4 = ld ...
2760
2761 // Nothing happens here since the RegisterMap for %2 contains no value.
2762 // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
2763 // InstrMap = {X -> 2, Z -> 3}
2764 // DBG_VALUE $noreg, A, .... # X
2765 // DBG_VALUE 0, A, ... # Y
2766 // DBG_VALUE %3, A, ..., # Z
2767 // %2 = ld ...
2768 // -> %3 = ld ...
2769 // %4 = ld ...
2770
2771 // Since the RegisterMap contains Z as a value for %3, the MachineInstr
2772 // pointer Z is copied to come after the load for %3 and the old Z's first
2773 // operand is changed to $noreg the Basic Block iterator is moved to after the
2774 // DBG_VALUE Z's new position.
2775 // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
2776 // InstrMap = {X -> 2, Z -> 3}
2777 // DBG_VALUE $noreg, A, .... # X
2778 // DBG_VALUE 0, A, ... # Y
2779 // DBG_VALUE $noreg, A, ..., # Old Z
2780 // %2 = ld ...
2781 // %3 = ld ...
2782 // DBG_VALUE %3, A, ..., # Z
2783 // -> %4 = ld ...
2784
2785 // Nothing happens for %4 and the algorithm exits having processed the entire
2786 // Basic Block.
2787 // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
2788 // InstrMap = {X -> 2, Z -> 3}
2789 // DBG_VALUE $noreg, A, .... # X
2790 // DBG_VALUE 0, A, ... # Y
2791 // DBG_VALUE $noreg, A, ..., # Old Z
2792 // %2 = ld ...
2793 // %3 = ld ...
2794 // DBG_VALUE %3, A, ..., # Z
2795 // %4 = ld ...
2796
2797 // This map is used to track the relationship between
2798 // a Debug Variable and the DBG_VALUE MachineInstr pointer that describes the
2799 // debug information for that Debug Variable.
2800 SmallDenseMap<DebugVariable, MachineInstr *, 8> DbgValueSinkCandidates;
2801 // This map is used to track the relationship between a DBG_VALUE or
2802 // DBG_VALUE_LIST MachineInstr pointer and Registers that it uses.
2803 SmallDenseMap<MachineInstr *, SmallVector<Register>, 8> InstrMap;
2804 for (MBBI = MBB->begin(), E = MBB->end(); MBBI != E; ++MBBI) {
2805 MachineInstr &MI = *MBBI;
2806
2807 auto PopulateRegisterAndInstrMapForDebugInstr = [&](Register Reg) {
2808 auto RegIt = RegisterMap.find(Reg);
2809 if (RegIt == RegisterMap.end())
2810 return;
2811 auto &InstrVec = RegIt->getSecond();
2812 InstrVec.push_back(&MI);
2813 InstrMap[&MI].push_back(Reg);
2814 };
2815
2816 if (MI.isDebugValue()) {
2817 assert(MI.getDebugVariable() &&
2818 "DBG_VALUE or DBG_VALUE_LIST must contain a DILocalVariable");
2819
2821 // If the first operand is a register and it exists in the RegisterMap, we
2822 // know this is a DBG_VALUE that uses the result of a load that was moved,
2823 // and is therefore a candidate to also be moved, add it to the
2824 // RegisterMap and InstrMap.
2825 forEachDbgRegOperand(&MI, [&](MachineOperand &Op) {
2826 PopulateRegisterAndInstrMapForDebugInstr(Op.getReg());
2827 });
2828
2829 // If the current DBG_VALUE describes the same variable as one of the
2830 // in-flight DBG_VALUEs, remove the candidate from the list and set it to
2831 // undef. Moving one DBG_VALUE past another would result in the variable's
2832 // value going back in time when stepping through the block in the
2833 // debugger.
2834 auto InstrIt = DbgValueSinkCandidates.find(DbgVar);
2835 if (InstrIt != DbgValueSinkCandidates.end()) {
2836 auto *Instr = InstrIt->getSecond();
2837 auto RegIt = InstrMap.find(Instr);
2838 if (RegIt != InstrMap.end()) {
2839 const auto &RegVec = RegIt->getSecond();
2840 // For every Register in the RegVec, remove the MachineInstr in the
2841 // RegisterMap that describes the DbgVar.
2842 for (auto &Reg : RegVec) {
2843 auto RegIt = RegisterMap.find(Reg);
2844 if (RegIt == RegisterMap.end())
2845 continue;
2846 auto &InstrVec = RegIt->getSecond();
2847 auto IsDbgVar = [&](MachineInstr *I) -> bool {
2849 return Var == DbgVar;
2850 };
2851
2852 llvm::erase_if(InstrVec, IsDbgVar);
2853 }
2855 [&](MachineOperand &Op) { Op.setReg(0); });
2856 }
2857 }
2858 DbgValueSinkCandidates[DbgVar] = &MI;
2859 } else {
2860 // If the first operand of a load matches with a DBG_VALUE in RegisterMap,
2861 // then move that DBG_VALUE to below the load.
2862 auto Opc = MI.getOpcode();
2863 if (!isLoadSingle(Opc))
2864 continue;
2865 auto Reg = MI.getOperand(0).getReg();
2866 auto RegIt = RegisterMap.find(Reg);
2867 if (RegIt == RegisterMap.end())
2868 continue;
2869 auto &DbgInstrVec = RegIt->getSecond();
2870 if (!DbgInstrVec.size())
2871 continue;
2872 for (auto *DbgInstr : DbgInstrVec) {
2873 MachineBasicBlock::iterator InsertPos = std::next(MBBI);
2874 auto *ClonedMI = MI.getMF()->CloneMachineInstr(DbgInstr);
2875 MBB->insert(InsertPos, ClonedMI);
2876 MBBI++;
2877 // Erase the entry into the DbgValueSinkCandidates for the DBG_VALUE
2878 // that was moved.
2879 auto DbgVar = createDebugVariableFromMachineInstr(DbgInstr);
2880 // Erase DbgVar from DbgValueSinkCandidates if still present. If the
2881 // instruction is a DBG_VALUE_LIST, it may have already been erased from
2882 // DbgValueSinkCandidates.
2883 DbgValueSinkCandidates.erase(DbgVar);
2884 // Zero out original dbg instr
2885 forEachDbgRegOperand(DbgInstr,
2886 [&](MachineOperand &Op) { Op.setReg(0); });
2887 // Update RegisterMap with ClonedMI because it might have to be moved
2888 // again.
2889 if (DbgInstr->isDebugValueList())
2890 updateRegisterMapForDbgValueListAfterMove(RegisterMap, ClonedMI,
2891 DbgInstr);
2892 }
2893 }
2894 }
2895 return RetVal;
2896}
2897
2898// Get the Base register operand index from the memory access MachineInst if we
2899// should attempt to distribute postinc on it. Return -1 if not of a valid
2900// instruction type. If it returns an index, it is assumed that instruction is a
2901// r+i indexing mode, and getBaseOperandIndex() + 1 is the Offset index.
2903 switch (MI.getOpcode()) {
2904 case ARM::MVE_VLDRBS16:
2905 case ARM::MVE_VLDRBS32:
2906 case ARM::MVE_VLDRBU16:
2907 case ARM::MVE_VLDRBU32:
2908 case ARM::MVE_VLDRHS32:
2909 case ARM::MVE_VLDRHU32:
2910 case ARM::MVE_VLDRBU8:
2911 case ARM::MVE_VLDRHU16:
2912 case ARM::MVE_VLDRWU32:
2913 case ARM::MVE_VSTRB16:
2914 case ARM::MVE_VSTRB32:
2915 case ARM::MVE_VSTRH32:
2916 case ARM::MVE_VSTRBU8:
2917 case ARM::MVE_VSTRHU16:
2918 case ARM::MVE_VSTRWU32:
2919 case ARM::t2LDRHi8:
2920 case ARM::t2LDRHi12:
2921 case ARM::t2LDRSHi8:
2922 case ARM::t2LDRSHi12:
2923 case ARM::t2LDRBi8:
2924 case ARM::t2LDRBi12:
2925 case ARM::t2LDRSBi8:
2926 case ARM::t2LDRSBi12:
2927 case ARM::t2STRBi8:
2928 case ARM::t2STRBi12:
2929 case ARM::t2STRHi8:
2930 case ARM::t2STRHi12:
2931 return 1;
2932 case ARM::MVE_VLDRBS16_post:
2933 case ARM::MVE_VLDRBS32_post:
2934 case ARM::MVE_VLDRBU16_post:
2935 case ARM::MVE_VLDRBU32_post:
2936 case ARM::MVE_VLDRHS32_post:
2937 case ARM::MVE_VLDRHU32_post:
2938 case ARM::MVE_VLDRBU8_post:
2939 case ARM::MVE_VLDRHU16_post:
2940 case ARM::MVE_VLDRWU32_post:
2941 case ARM::MVE_VSTRB16_post:
2942 case ARM::MVE_VSTRB32_post:
2943 case ARM::MVE_VSTRH32_post:
2944 case ARM::MVE_VSTRBU8_post:
2945 case ARM::MVE_VSTRHU16_post:
2946 case ARM::MVE_VSTRWU32_post:
2947 case ARM::MVE_VLDRBS16_pre:
2948 case ARM::MVE_VLDRBS32_pre:
2949 case ARM::MVE_VLDRBU16_pre:
2950 case ARM::MVE_VLDRBU32_pre:
2951 case ARM::MVE_VLDRHS32_pre:
2952 case ARM::MVE_VLDRHU32_pre:
2953 case ARM::MVE_VLDRBU8_pre:
2954 case ARM::MVE_VLDRHU16_pre:
2955 case ARM::MVE_VLDRWU32_pre:
2956 case ARM::MVE_VSTRB16_pre:
2957 case ARM::MVE_VSTRB32_pre:
2958 case ARM::MVE_VSTRH32_pre:
2959 case ARM::MVE_VSTRBU8_pre:
2960 case ARM::MVE_VSTRHU16_pre:
2961 case ARM::MVE_VSTRWU32_pre:
2962 return 2;
2963 }
2964 return -1;
2965}
2966
2968 switch (MI.getOpcode()) {
2969 case ARM::MVE_VLDRBS16_post:
2970 case ARM::MVE_VLDRBS32_post:
2971 case ARM::MVE_VLDRBU16_post:
2972 case ARM::MVE_VLDRBU32_post:
2973 case ARM::MVE_VLDRHS32_post:
2974 case ARM::MVE_VLDRHU32_post:
2975 case ARM::MVE_VLDRBU8_post:
2976 case ARM::MVE_VLDRHU16_post:
2977 case ARM::MVE_VLDRWU32_post:
2978 case ARM::MVE_VSTRB16_post:
2979 case ARM::MVE_VSTRB32_post:
2980 case ARM::MVE_VSTRH32_post:
2981 case ARM::MVE_VSTRBU8_post:
2982 case ARM::MVE_VSTRHU16_post:
2983 case ARM::MVE_VSTRWU32_post:
2984 return true;
2985 }
2986 return false;
2987}
2988
2990 switch (MI.getOpcode()) {
2991 case ARM::MVE_VLDRBS16_pre:
2992 case ARM::MVE_VLDRBS32_pre:
2993 case ARM::MVE_VLDRBU16_pre:
2994 case ARM::MVE_VLDRBU32_pre:
2995 case ARM::MVE_VLDRHS32_pre:
2996 case ARM::MVE_VLDRHU32_pre:
2997 case ARM::MVE_VLDRBU8_pre:
2998 case ARM::MVE_VLDRHU16_pre:
2999 case ARM::MVE_VLDRWU32_pre:
3000 case ARM::MVE_VSTRB16_pre:
3001 case ARM::MVE_VSTRB32_pre:
3002 case ARM::MVE_VSTRH32_pre:
3003 case ARM::MVE_VSTRBU8_pre:
3004 case ARM::MVE_VSTRHU16_pre:
3005 case ARM::MVE_VSTRWU32_pre:
3006 return true;
3007 }
3008 return false;
3009}
3010
3011// Given a memory access Opcode, check that the give Imm would be a valid Offset
3012// for this instruction (same as isLegalAddressImm), Or if the instruction
3013// could be easily converted to one where that was valid. For example converting
3014// t2LDRi12 to t2LDRi8 for negative offsets. Works in conjunction with
3015// AdjustBaseAndOffset below.
3016static bool isLegalOrConvertibleAddressImm(unsigned Opcode, int Imm,
3017 const TargetInstrInfo *TII,
3018 int &CodesizeEstimate) {
3019 if (isLegalAddressImm(Opcode, Imm, TII))
3020 return true;
3021
3022 // We can convert AddrModeT2_i12 to AddrModeT2_i8neg.
3023 const MCInstrDesc &Desc = TII->get(Opcode);
3024 unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
3025 switch (AddrMode) {
3027 CodesizeEstimate += 1;
3028 return Imm < 0 && -Imm < ((1 << 8) * 1);
3029 }
3030 return false;
3031}
3032
3033// Given an MI adjust its address BaseReg to use NewBaseReg and address offset
3034// by -Offset. This can either happen in-place or be a replacement as MI is
3035// converted to another instruction type.
3037 int Offset, const TargetInstrInfo *TII,
3038 const TargetRegisterInfo *TRI) {
3039 // Set the Base reg
3040 unsigned BaseOp = getBaseOperandIndex(*MI);
3041 MI->getOperand(BaseOp).setReg(NewBaseReg);
3042 // and constrain the reg class to that required by the instruction.
3043 MachineFunction *MF = MI->getMF();
3044 MachineRegisterInfo &MRI = MF->getRegInfo();
3045 const MCInstrDesc &MCID = TII->get(MI->getOpcode());
3046 const TargetRegisterClass *TRC = TII->getRegClass(MCID, BaseOp);
3047 MRI.constrainRegClass(NewBaseReg, TRC);
3048
3049 int OldOffset = MI->getOperand(BaseOp + 1).getImm();
3050 if (isLegalAddressImm(MI->getOpcode(), OldOffset - Offset, TII))
3051 MI->getOperand(BaseOp + 1).setImm(OldOffset - Offset);
3052 else {
3053 unsigned ConvOpcode;
3054 switch (MI->getOpcode()) {
3055 case ARM::t2LDRHi12:
3056 ConvOpcode = ARM::t2LDRHi8;
3057 break;
3058 case ARM::t2LDRSHi12:
3059 ConvOpcode = ARM::t2LDRSHi8;
3060 break;
3061 case ARM::t2LDRBi12:
3062 ConvOpcode = ARM::t2LDRBi8;
3063 break;
3064 case ARM::t2LDRSBi12:
3065 ConvOpcode = ARM::t2LDRSBi8;
3066 break;
3067 case ARM::t2STRHi12:
3068 ConvOpcode = ARM::t2STRHi8;
3069 break;
3070 case ARM::t2STRBi12:
3071 ConvOpcode = ARM::t2STRBi8;
3072 break;
3073 default:
3074 llvm_unreachable("Unhandled convertible opcode");
3075 }
3076 assert(isLegalAddressImm(ConvOpcode, OldOffset - Offset, TII) &&
3077 "Illegal Address Immediate after convert!");
3078
3079 const MCInstrDesc &MCID = TII->get(ConvOpcode);
3080 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3081 .add(MI->getOperand(0))
3082 .add(MI->getOperand(1))
3083 .addImm(OldOffset - Offset)
3084 .add(MI->getOperand(3))
3085 .add(MI->getOperand(4))
3086 .cloneMemRefs(*MI);
3087 MI->eraseFromParent();
3088 }
3089}
3090
3092 Register NewReg,
3093 const TargetInstrInfo *TII,
3094 const TargetRegisterInfo *TRI) {
3095 MachineFunction *MF = MI->getMF();
3096 MachineRegisterInfo &MRI = MF->getRegInfo();
3097
3098 unsigned NewOpcode = getPostIndexedLoadStoreOpcode(
3099 MI->getOpcode(), Offset > 0 ? ARM_AM::add : ARM_AM::sub);
3100
3101 const MCInstrDesc &MCID = TII->get(NewOpcode);
3102 // Constrain the def register class
3103 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0);
3104 MRI.constrainRegClass(NewReg, TRC);
3105 // And do the same for the base operand
3106 TRC = TII->getRegClass(MCID, 2);
3107 MRI.constrainRegClass(MI->getOperand(1).getReg(), TRC);
3108
3109 unsigned AddrMode = (MCID.TSFlags & ARMII::AddrModeMask);
3110 switch (AddrMode) {
3114 // Any MVE load/store
3115 return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3116 .addReg(NewReg, RegState::Define)
3117 .add(MI->getOperand(0))
3118 .add(MI->getOperand(1))
3119 .addImm(Offset)
3120 .add(MI->getOperand(3))
3121 .add(MI->getOperand(4))
3122 .add(MI->getOperand(5))
3123 .cloneMemRefs(*MI);
3125 if (MI->mayLoad()) {
3126 return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3127 .add(MI->getOperand(0))
3128 .addReg(NewReg, RegState::Define)
3129 .add(MI->getOperand(1))
3130 .addImm(Offset)
3131 .add(MI->getOperand(3))
3132 .add(MI->getOperand(4))
3133 .cloneMemRefs(*MI);
3134 } else {
3135 return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3136 .addReg(NewReg, RegState::Define)
3137 .add(MI->getOperand(0))
3138 .add(MI->getOperand(1))
3139 .addImm(Offset)
3140 .add(MI->getOperand(3))
3141 .add(MI->getOperand(4))
3142 .cloneMemRefs(*MI);
3143 }
3144 default:
3145 llvm_unreachable("Unhandled createPostIncLoadStore");
3146 }
3147}
3148
3149// Given a Base Register, optimise the load/store uses to attempt to create more
3150// post-inc accesses and less register moves. We do this by taking zero offset
3151// loads/stores with an add, and convert them to a postinc load/store of the
3152// same type. Any subsequent accesses will be adjusted to use and account for
3153// the post-inc value.
3154// For example:
3155// LDR #0 LDR_POSTINC #16
3156// LDR #4 LDR #-12
3157// LDR #8 LDR #-8
3158// LDR #12 LDR #-4
3159// ADD #16
3160//
3161// At the same time if we do not find an increment but do find an existing
3162// pre/post inc instruction, we can still adjust the offsets of subsequent
3163// instructions to save the register move that would otherwise be needed for the
3164// in-place increment.
3165bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
3166 // We are looking for:
3167 // One zero offset load/store that can become postinc
3168 MachineInstr *BaseAccess = nullptr;
3169 MachineInstr *PrePostInc = nullptr;
3170 // An increment that can be folded in
3171 MachineInstr *Increment = nullptr;
3172 // Other accesses after BaseAccess that will need to be updated to use the
3173 // postinc value.
3174 SmallPtrSet<MachineInstr *, 8> OtherAccesses;
3175 for (auto &Use : MRI->use_nodbg_instructions(Base)) {
3176 if (!Increment && getAddSubImmediate(Use) != 0) {
3177 Increment = &Use;
3178 continue;
3179 }
3180
3181 int BaseOp = getBaseOperandIndex(Use);
3182 if (BaseOp == -1)
3183 return false;
3184
3185 if (!Use.getOperand(BaseOp).isReg() ||
3186 Use.getOperand(BaseOp).getReg() != Base)
3187 return false;
3188 if (isPreIndex(Use) || isPostIndex(Use))
3189 PrePostInc = &Use;
3190 else if (Use.getOperand(BaseOp + 1).getImm() == 0)
3191 BaseAccess = &Use;
3192 else
3193 OtherAccesses.insert(&Use);
3194 }
3195
3196 int IncrementOffset;
3197 Register NewBaseReg;
3198 if (BaseAccess && Increment) {
3199 if (PrePostInc || BaseAccess->getParent() != Increment->getParent())
3200 return false;
3201 Register PredReg;
3202 if (Increment->definesRegister(ARM::CPSR, /*TRI=*/nullptr) ||
3204 return false;
3205
3206 LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on VirtualReg "
3207 << Base.virtRegIndex() << "\n");
3208
3209 // Make sure that Increment has no uses before BaseAccess that are not PHI
3210 // uses.
3211 for (MachineInstr &Use :
3212 MRI->use_nodbg_instructions(Increment->getOperand(0).getReg())) {
3213 if (&Use == BaseAccess || (Use.getOpcode() != TargetOpcode::PHI &&
3214 !DT->dominates(BaseAccess, &Use))) {
3215 LLVM_DEBUG(dbgs() << " BaseAccess doesn't dominate use of increment\n");
3216 return false;
3217 }
3218 }
3219
3220 // Make sure that Increment can be folded into Base
3221 IncrementOffset = getAddSubImmediate(*Increment);
3222 unsigned NewPostIncOpcode = getPostIndexedLoadStoreOpcode(
3223 BaseAccess->getOpcode(), IncrementOffset > 0 ? ARM_AM::add : ARM_AM::sub);
3224 if (!isLegalAddressImm(NewPostIncOpcode, IncrementOffset, TII)) {
3225 LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on postinc\n");
3226 return false;
3227 }
3228 }
3229 else if (PrePostInc) {
3230 // If we already have a pre/post index load/store then set BaseAccess,
3231 // IncrementOffset and NewBaseReg to the values it already produces,
3232 // allowing us to update and subsequent uses of BaseOp reg with the
3233 // incremented value.
3234 if (Increment)
3235 return false;
3236
3237 LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on already "
3238 << "indexed VirtualReg " << Base.virtRegIndex() << "\n");
3239 int BaseOp = getBaseOperandIndex(*PrePostInc);
3240 IncrementOffset = PrePostInc->getOperand(BaseOp+1).getImm();
3241 BaseAccess = PrePostInc;
3242 NewBaseReg = PrePostInc->getOperand(0).getReg();
3243 }
3244 else
3245 return false;
3246
3247 // And make sure that the negative value of increment can be added to all
3248 // other offsets after the BaseAccess. We rely on either
3249 // dominates(BaseAccess, OtherAccess) or dominates(OtherAccess, BaseAccess)
3250 // to keep things simple.
3251 // This also adds a simple codesize metric, to detect if an instruction (like
3252 // t2LDRBi12) which can often be shrunk to a thumb1 instruction (tLDRBi)
3253 // cannot because it is converted to something else (t2LDRBi8). We start this
3254 // at -1 for the gain from removing the increment.
3255 SmallPtrSet<MachineInstr *, 4> SuccessorAccesses;
3256 int CodesizeEstimate = -1;
3257 for (auto *Use : OtherAccesses) {
3258 if (DT->dominates(BaseAccess, Use)) {
3259 SuccessorAccesses.insert(Use);
3260 unsigned BaseOp = getBaseOperandIndex(*Use);
3261 if (!isLegalOrConvertibleAddressImm(Use->getOpcode(),
3262 Use->getOperand(BaseOp + 1).getImm() -
3263 IncrementOffset,
3264 TII, CodesizeEstimate)) {
3265 LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on use\n");
3266 return false;
3267 }
3268 } else if (!DT->dominates(Use, BaseAccess)) {
3269 LLVM_DEBUG(
3270 dbgs() << " Unknown dominance relation between Base and Use\n");
3271 return false;
3272 }
3273 }
3274 if (STI->hasMinSize() && CodesizeEstimate > 0) {
3275 LLVM_DEBUG(dbgs() << " Expected to grow instructions under minsize\n");
3276 return false;
3277 }
3278
3279 if (!PrePostInc) {
3280 // Replace BaseAccess with a post inc
3281 LLVM_DEBUG(dbgs() << "Changing: "; BaseAccess->dump());
3282 LLVM_DEBUG(dbgs() << " And : "; Increment->dump());
3283 NewBaseReg = Increment->getOperand(0).getReg();
3284 MachineInstr *BaseAccessPost =
3285 createPostIncLoadStore(BaseAccess, IncrementOffset, NewBaseReg, TII, TRI);
3286 BaseAccess->eraseFromParent();
3287 Increment->eraseFromParent();
3288 (void)BaseAccessPost;
3289 LLVM_DEBUG(dbgs() << " To : "; BaseAccessPost->dump());
3290 }
3291
3292 for (auto *Use : SuccessorAccesses) {
3293 LLVM_DEBUG(dbgs() << "Changing: "; Use->dump());
3294 AdjustBaseAndOffset(Use, NewBaseReg, IncrementOffset, TII, TRI);
3295 LLVM_DEBUG(dbgs() << " To : "; Use->dump());
3296 }
3297
3298 // Remove the kill flag from all uses of NewBaseReg, in case any old uses
3299 // remain.
3300 for (MachineOperand &Op : MRI->use_nodbg_operands(NewBaseReg))
3301 Op.setIsKill(false);
3302 return true;
3303}
3304
3305bool ARMPreAllocLoadStoreOpt::DistributeIncrements() {
3306 bool Changed = false;
3307 SmallSetVector<Register, 4> Visited;
3308 for (auto &MBB : *MF) {
3309 for (auto &MI : MBB) {
3310 int BaseOp = getBaseOperandIndex(MI);
3311 if (BaseOp == -1 || !MI.getOperand(BaseOp).isReg())
3312 continue;
3313
3314 Register Base = MI.getOperand(BaseOp).getReg();
3315 if (!Base.isVirtual())
3316 continue;
3317
3318 Visited.insert(Base);
3319 }
3320 }
3321
3322 for (auto Base : Visited)
3323 Changed |= DistributeIncrements(Base);
3324
3325 return Changed;
3326}
3327
3328/// Returns an instance of the load / store optimization pass.
3330 if (PreAlloc)
3331 return new ARMPreAllocLoadStoreOptLegacy();
3332 return new ARMLoadStoreOptLegacy();
3333}
3334
3338 ARMLoadStoreOpt Impl;
3339 bool Changed = Impl.runOnMachineFunction(MF);
3340 if (!Changed)
3341 return PreservedAnalyses::all();
3344 return PA;
3345}
3346
3350 ARMPreAllocLoadStoreOpt Impl;
3351 AliasAnalysis *AA =
3353 .getManager()
3354 .getResult<AAManager>(MF.getFunction());
3356 bool Changed = Impl.runOnMachineFunction(MF, AA, DT);
3357 if (!Changed)
3358 return PreservedAnalyses::all();
3361 return PA;
3362}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static bool isLoadSingle(unsigned Opc)
static int getMemoryOpOffset(const MachineInstr &MI)
static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, MachineBasicBlock::iterator I, MachineBasicBlock::iterator E, SmallPtrSetImpl< MachineInstr * > &MemOps, SmallSet< unsigned, 4 > &MemRegs, const TargetRegisterInfo *TRI, AliasAnalysis *AA)
static bool ContainsReg(ArrayRef< std::pair< unsigned, bool > > Regs, unsigned Reg)
static bool isPreIndex(MachineInstr &MI)
static void forEachDbgRegOperand(MachineInstr *MI, std::function< void(MachineOperand &)> Fn)
static bool isPostIndex(MachineInstr &MI)
static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode)
static unsigned getLSMultipleTransferSize(const MachineInstr *MI)
static bool isLegalOrConvertibleAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII, int &CodesizeEstimate)
static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode)
static bool isT1i32Load(unsigned Opc)
static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg, int Offset, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
static MachineInstr * createPostIncLoadStore(MachineInstr *MI, int Offset, Register NewReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static bool isi32Store(unsigned Opc)
static MachineBasicBlock::iterator findIncDecAfter(MachineBasicBlock::iterator MBBI, Register Reg, ARMCC::CondCodes Pred, Register PredReg, int &Offset, const TargetRegisterInfo *TRI)
Searches for a increment or decrement of Reg after MBBI.
static MachineBasicBlock::iterator findIncDecBefore(MachineBasicBlock::iterator MBBI, Register Reg, ARMCC::CondCodes Pred, Register PredReg, int &Offset)
Searches for an increment or decrement of Reg before MBBI.
static const MachineOperand & getLoadStoreBaseOp(const MachineInstr &MI)
static void updateRegisterMapForDbgValueListAfterMove(SmallDenseMap< Register, SmallVector< MachineInstr * >, 8 > &RegisterMap, MachineInstr *DbgValueListInstr, MachineInstr *InstrToReplace)
arm prera ldst static false cl::opt< unsigned > InstReorderLimit("arm-prera-ldst-opt-reorder-limit", cl::init(8), cl::Hidden)
static void InsertLDR_STR(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int Offset, bool isDef, unsigned NewOpc, unsigned Reg, bool RegDeadKill, bool RegUndef, unsigned BaseReg, bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred, unsigned PredReg, const TargetInstrInfo *TII, MachineInstr *MI)
static int isIncrementOrDecrement(const MachineInstr &MI, Register Reg, ARMCC::CondCodes Pred, Register PredReg)
Check if the given instruction increments or decrements a register and return the amount it is increm...
static bool isT2i32Store(unsigned Opc)
static bool mayCombineMisaligned(const TargetSubtargetInfo &STI, const MachineInstr &MI)
Return true for loads/stores that can be combined to a double/multi operation without increasing the ...
static int getBaseOperandIndex(MachineInstr &MI)
static bool isT2i32Load(unsigned Opc)
static bool isi32Load(unsigned Opc)
static unsigned getImmScale(unsigned Opc)
static bool isT1i32Store(unsigned Opc)
#define ARM_PREALLOC_LOAD_STORE_OPT_NAME
#define ARM_LOAD_STORE_OPT_NAME
static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, ARM_AM::AMSubMode Mode)
static bool isMemoryOp(const MachineInstr &MI)
Returns true if instruction is a memory operation that this pass is capable of operating on.
static const MachineOperand & getLoadStoreRegOp(const MachineInstr &MI)
static bool isValidLSDoubleOffset(int Offset)
static DebugVariable createDebugVariableFromMachineInstr(MachineInstr *MI)
static cl::opt< bool > AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden, cl::init(false), cl::desc("Be more conservative in ARM load/store opt"))
This switch disables formation of double/multi instructions that could potentially lead to (new) alig...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file defines the BumpPtrAllocator interface.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
A set of register units.
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
Basic Register Allocator
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
static void updateLRRestored(MachineFunction &MF)
Update the IsRestored flag on LR if it is spilled, based on the return instructions.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
const ARMBaseInstrInfo * getInstrInfo() const override
bool isThumb2() const
const ARMTargetLowering * getTargetLowering() const override
const ARMBaseRegisterInfo * getRegisterInfo() const override
bool hasMinSize() const
bool isCortexM3() const
Align getDualLoadStoreAlignment() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:123
Identifies a unique instance of a variable.
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
bool erase(const KeyT &Val)
Definition DenseMap.h:328
iterator end()
Definition DenseMap.h:81
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
Describe properties that are true of each instruction in the target description file.
MachineInstrBundleIterator< const MachineInstr > const_iterator
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineInstr *A, const MachineInstr *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void copyImplicitOps(MachineFunction &MF, const MachineInstr &MI)
Copy implicit register operands from specified instruction to this instruction.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
bool isAtomic() const
Returns true if this operation has an atomic ordering requirement of unordered or higher,...
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
int64_t getImm() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
iterator_range< use_nodbg_iterator > use_nodbg_operands(Register Reg) const
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF, bool Rev=false)
runOnFunction - Prepare to answer questions about MF.
ArrayRef< MCPhysReg > getOrder(const TargetRegisterClass *RC) const
getOrder - Returns the preferred allocation order for RC.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
size_type size() const
Definition SmallSet.h:171
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
Definition Allocator.h:390
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Align getTransientStackAlign() const
getTransientStackAlignment - This method returns the number of bytes to which the stack pointer must ...
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetFrameLowering * getFrameLowering() const
LLVM Value Representation.
Definition Value.h:75
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition DenseSet.h:180
Changed
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned char getAM3Offset(unsigned AM3Opc)
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
AddrOpc getAM5Op(unsigned AM5Opc)
unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, unsigned IdxMode=0)
getAM3Opc - This function encodes the addrmode3 opc field.
unsigned char getAM5Offset(unsigned AM5Opc)
AddrOpc getAM3Op(unsigned AM3Opc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ ARM
Windows AXP64.
Definition MCAsmInfo.h:49
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:50
This namespace contains all of the command line option processing machinery.
Definition CommandLine.h:52
initializer< Ty > init(const Ty &Val)
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
BBIterator iterator
Definition BasicBlock.h:87
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:557
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Define
Register definition.
constexpr RegState getKillRegState(bool B)
static bool isARMLowRegister(MCRegister Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition APFloat.h:1660
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool isLegalAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII)
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
constexpr RegState getDeadRegState(bool B)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
Op::Description Desc
unsigned M1(unsigned Val)
Definition VE.h:377
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr RegState getDefRegState(bool B)
FunctionPass * createARMLoadStoreOptLegacyPass(bool PreAlloc=false)
Returns an instance of the load / store optimization pass.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
void replace(R &&Range, const T &OldValue, const T &NewValue)
Provide wrappers to std::replace which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1909
ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, Register &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition,...
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2191
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
int getAddSubImmediate(MachineInstr &MI)
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
constexpr RegState getUndefRegState(bool B)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39