55#define DEBUG_TYPE "aarch64-ldst-opt"
57STATISTIC(NumPairCreated,
"Number of load/store pair instructions generated");
58STATISTIC(NumPostFolded,
"Number of post-index updates folded");
59STATISTIC(NumPreFolded,
"Number of pre-index updates folded");
61 "Number of load/store from unscaled generated");
62STATISTIC(NumZeroStoresPromoted,
"Number of narrow zero stores promoted");
63STATISTIC(NumLoadsFromStoresPromoted,
"Number of loads from stores promoted");
64STATISTIC(NumFailedAlignmentCheck,
"Number of load/store pair transformation "
65 "not passed the alignment check");
67 "Number of const offset of index address folded");
70 "Controls which pairs are considered for renaming");
90#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
94using LdStPairFlags =
struct LdStPairFlags {
98 bool MergeForward =
false;
109 std::optional<MCPhysReg> RenameReg;
111 LdStPairFlags() =
default;
113 void setMergeForward(
bool V =
true) { MergeForward = V; }
114 bool getMergeForward()
const {
return MergeForward; }
116 void setSExtIdx(
int V) { SExtIdx = V; }
117 int getSExtIdx()
const {
return SExtIdx; }
119 void setRenameReg(
MCPhysReg R) { RenameReg = R; }
120 void clearRenameReg() { RenameReg = std::nullopt; }
121 std::optional<MCPhysReg> getRenameReg()
const {
return RenameReg; }
147 LdStPairFlags &Flags,
149 bool FindNarrowMerge);
160 const LdStPairFlags &Flags);
166 const LdStPairFlags &Flags);
178 int UnscaledOffset,
unsigned Limit);
200 unsigned BaseReg,
int Offset);
203 unsigned IndexReg,
unsigned &
Offset);
206 std::optional<MachineBasicBlock::iterator>
209 bool IsPreIdx,
bool MergeEither);
242char AArch64LoadStoreOpt::ID = 0;
249static
bool isNarrowStore(
unsigned Opc) {
253 case AArch64::STRBBui:
254 case AArch64::STURBBi:
255 case AArch64::STRHHui:
256 case AArch64::STURHHi:
264 switch (
MI.getOpcode()) {
270 case AArch64::STZ2Gi:
276 bool *IsValidLdStrOpc =
nullptr) {
278 *IsValidLdStrOpc =
true;
282 *IsValidLdStrOpc =
false;
283 return std::numeric_limits<unsigned>::max();
284 case AArch64::STRDui:
285 case AArch64::STURDi:
286 case AArch64::STRDpre:
287 case AArch64::STRQui:
288 case AArch64::STURQi:
289 case AArch64::STRQpre:
290 case AArch64::STRBBui:
291 case AArch64::STURBBi:
292 case AArch64::STRHHui:
293 case AArch64::STURHHi:
294 case AArch64::STRWui:
295 case AArch64::STRWpre:
296 case AArch64::STURWi:
297 case AArch64::STRXui:
298 case AArch64::STRXpre:
299 case AArch64::STURXi:
300 case AArch64::STR_ZXI:
301 case AArch64::LDRDui:
302 case AArch64::LDURDi:
303 case AArch64::LDRDpre:
304 case AArch64::LDRQui:
305 case AArch64::LDURQi:
306 case AArch64::LDRQpre:
307 case AArch64::LDRWui:
308 case AArch64::LDURWi:
309 case AArch64::LDRWpre:
310 case AArch64::LDRXui:
311 case AArch64::LDURXi:
312 case AArch64::LDRXpre:
313 case AArch64::STRSui:
314 case AArch64::STURSi:
315 case AArch64::STRSpre:
316 case AArch64::LDRSui:
317 case AArch64::LDURSi:
318 case AArch64::LDRSpre:
319 case AArch64::LDR_ZXI:
321 case AArch64::LDRSWui:
322 return AArch64::LDRWui;
323 case AArch64::LDURSWi:
324 return AArch64::LDURWi;
325 case AArch64::LDRSWpre:
326 return AArch64::LDRWpre;
334 case AArch64::STRBBui:
335 return AArch64::STRHHui;
336 case AArch64::STRHHui:
337 return AArch64::STRWui;
338 case AArch64::STURBBi:
339 return AArch64::STURHHi;
340 case AArch64::STURHHi:
341 return AArch64::STURWi;
342 case AArch64::STURWi:
343 return AArch64::STURXi;
344 case AArch64::STRWui:
345 return AArch64::STRXui;
353 case AArch64::STRSui:
354 case AArch64::STURSi:
355 return AArch64::STPSi;
356 case AArch64::STRSpre:
357 return AArch64::STPSpre;
358 case AArch64::STRDui:
359 case AArch64::STURDi:
360 return AArch64::STPDi;
361 case AArch64::STRDpre:
362 return AArch64::STPDpre;
363 case AArch64::STRQui:
364 case AArch64::STURQi:
365 case AArch64::STR_ZXI:
366 return AArch64::STPQi;
367 case AArch64::STRQpre:
368 return AArch64::STPQpre;
369 case AArch64::STRWui:
370 case AArch64::STURWi:
371 return AArch64::STPWi;
372 case AArch64::STRWpre:
373 return AArch64::STPWpre;
374 case AArch64::STRXui:
375 case AArch64::STURXi:
376 return AArch64::STPXi;
377 case AArch64::STRXpre:
378 return AArch64::STPXpre;
379 case AArch64::LDRSui:
380 case AArch64::LDURSi:
381 return AArch64::LDPSi;
382 case AArch64::LDRSpre:
383 return AArch64::LDPSpre;
384 case AArch64::LDRDui:
385 case AArch64::LDURDi:
386 return AArch64::LDPDi;
387 case AArch64::LDRDpre:
388 return AArch64::LDPDpre;
389 case AArch64::LDRQui:
390 case AArch64::LDURQi:
391 case AArch64::LDR_ZXI:
392 return AArch64::LDPQi;
393 case AArch64::LDRQpre:
394 return AArch64::LDPQpre;
395 case AArch64::LDRWui:
396 case AArch64::LDURWi:
397 return AArch64::LDPWi;
398 case AArch64::LDRWpre:
399 return AArch64::LDPWpre;
400 case AArch64::LDRXui:
401 case AArch64::LDURXi:
402 return AArch64::LDPXi;
403 case AArch64::LDRXpre:
404 return AArch64::LDPXpre;
405 case AArch64::LDRSWui:
406 case AArch64::LDURSWi:
407 return AArch64::LDPSWi;
408 case AArch64::LDRSWpre:
409 return AArch64::LDPSWpre;
420 case AArch64::LDRBBui:
421 return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
422 StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
423 case AArch64::LDURBBi:
424 return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
425 StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
426 case AArch64::LDRHHui:
427 return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
428 StOpc == AArch64::STRXui;
429 case AArch64::LDURHHi:
430 return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
431 StOpc == AArch64::STURXi;
432 case AArch64::LDRWui:
433 return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
434 case AArch64::LDURWi:
435 return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
436 case AArch64::LDRXui:
437 return StOpc == AArch64::STRXui;
438 case AArch64::LDURXi:
439 return StOpc == AArch64::STURXi;
451 case AArch64::STRSui:
452 return AArch64::STRSpre;
453 case AArch64::STRDui:
454 return AArch64::STRDpre;
455 case AArch64::STRQui:
456 return AArch64::STRQpre;
457 case AArch64::STRBBui:
458 return AArch64::STRBBpre;
459 case AArch64::STRHHui:
460 return AArch64::STRHHpre;
461 case AArch64::STRWui:
462 return AArch64::STRWpre;
463 case AArch64::STRXui:
464 return AArch64::STRXpre;
465 case AArch64::LDRSui:
466 return AArch64::LDRSpre;
467 case AArch64::LDRDui:
468 return AArch64::LDRDpre;
469 case AArch64::LDRQui:
470 return AArch64::LDRQpre;
471 case AArch64::LDRBBui:
472 return AArch64::LDRBBpre;
473 case AArch64::LDRHHui:
474 return AArch64::LDRHHpre;
475 case AArch64::LDRWui:
476 return AArch64::LDRWpre;
477 case AArch64::LDRXui:
478 return AArch64::LDRXpre;
479 case AArch64::LDRSWui:
480 return AArch64::LDRSWpre;
482 return AArch64::LDPSpre;
483 case AArch64::LDPSWi:
484 return AArch64::LDPSWpre;
486 return AArch64::LDPDpre;
488 return AArch64::LDPQpre;
490 return AArch64::LDPWpre;
492 return AArch64::LDPXpre;
494 return AArch64::STPSpre;
496 return AArch64::STPDpre;
498 return AArch64::STPQpre;
500 return AArch64::STPWpre;
502 return AArch64::STPXpre;
504 return AArch64::STGPreIndex;
506 return AArch64::STZGPreIndex;
508 return AArch64::ST2GPreIndex;
509 case AArch64::STZ2Gi:
510 return AArch64::STZ2GPreIndex;
512 return AArch64::STGPpre;
521 case AArch64::LDRBroX:
522 return AArch64::LDRBui;
523 case AArch64::LDRBBroX:
524 return AArch64::LDRBBui;
525 case AArch64::LDRSBXroX:
526 return AArch64::LDRSBXui;
527 case AArch64::LDRSBWroX:
528 return AArch64::LDRSBWui;
529 case AArch64::LDRHroX:
530 return AArch64::LDRHui;
531 case AArch64::LDRHHroX:
532 return AArch64::LDRHHui;
533 case AArch64::LDRSHXroX:
534 return AArch64::LDRSHXui;
535 case AArch64::LDRSHWroX:
536 return AArch64::LDRSHWui;
537 case AArch64::LDRWroX:
538 return AArch64::LDRWui;
539 case AArch64::LDRSroX:
540 return AArch64::LDRSui;
541 case AArch64::LDRSWroX:
542 return AArch64::LDRSWui;
543 case AArch64::LDRDroX:
544 return AArch64::LDRDui;
545 case AArch64::LDRXroX:
546 return AArch64::LDRXui;
547 case AArch64::LDRQroX:
548 return AArch64::LDRQui;
556 case AArch64::STRSui:
557 case AArch64::STURSi:
558 return AArch64::STRSpost;
559 case AArch64::STRDui:
560 case AArch64::STURDi:
561 return AArch64::STRDpost;
562 case AArch64::STRQui:
563 case AArch64::STURQi:
564 return AArch64::STRQpost;
565 case AArch64::STRBBui:
566 return AArch64::STRBBpost;
567 case AArch64::STRHHui:
568 return AArch64::STRHHpost;
569 case AArch64::STRWui:
570 case AArch64::STURWi:
571 return AArch64::STRWpost;
572 case AArch64::STRXui:
573 case AArch64::STURXi:
574 return AArch64::STRXpost;
575 case AArch64::LDRSui:
576 case AArch64::LDURSi:
577 return AArch64::LDRSpost;
578 case AArch64::LDRDui:
579 case AArch64::LDURDi:
580 return AArch64::LDRDpost;
581 case AArch64::LDRQui:
582 case AArch64::LDURQi:
583 return AArch64::LDRQpost;
584 case AArch64::LDRBBui:
585 return AArch64::LDRBBpost;
586 case AArch64::LDRHHui:
587 return AArch64::LDRHHpost;
588 case AArch64::LDRWui:
589 case AArch64::LDURWi:
590 return AArch64::LDRWpost;
591 case AArch64::LDRXui:
592 case AArch64::LDURXi:
593 return AArch64::LDRXpost;
594 case AArch64::LDRSWui:
595 return AArch64::LDRSWpost;
597 return AArch64::LDPSpost;
598 case AArch64::LDPSWi:
599 return AArch64::LDPSWpost;
601 return AArch64::LDPDpost;
603 return AArch64::LDPQpost;
605 return AArch64::LDPWpost;
607 return AArch64::LDPXpost;
609 return AArch64::STPSpost;
611 return AArch64::STPDpost;
613 return AArch64::STPQpost;
615 return AArch64::STPWpost;
617 return AArch64::STPXpost;
619 return AArch64::STGPostIndex;
621 return AArch64::STZGPostIndex;
623 return AArch64::ST2GPostIndex;
624 case AArch64::STZ2Gi:
625 return AArch64::STZ2GPostIndex;
627 return AArch64::STGPpost;
634 unsigned OpcB =
MI.getOpcode();
639 case AArch64::STRSpre:
640 return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
641 case AArch64::STRDpre:
642 return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
643 case AArch64::STRQpre:
644 return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
645 case AArch64::STRWpre:
646 return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
647 case AArch64::STRXpre:
648 return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
649 case AArch64::LDRSpre:
650 return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
651 case AArch64::LDRDpre:
652 return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
653 case AArch64::LDRQpre:
654 return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
655 case AArch64::LDRWpre:
656 return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
657 case AArch64::LDRXpre:
658 return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
659 case AArch64::LDRSWpre:
660 return (OpcB == AArch64::LDRSWui) || (OpcB == AArch64::LDURSWi);
666 int &MinOffset,
int &MaxOffset) {
684 unsigned PairedRegOp = 0) {
685 assert(PairedRegOp < 2 &&
"Unexpected register operand idx.");
691 return MI.getOperand(Idx);
700 int UnscaledStOffset =
704 int UnscaledLdOffset =
708 return (UnscaledStOffset <= UnscaledLdOffset) &&
709 (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
713 unsigned Opc =
MI.getOpcode();
714 return (
Opc == AArch64::STRWui ||
Opc == AArch64::STURWi ||
715 isNarrowStore(
Opc)) &&
720 switch (
MI.getOpcode()) {
724 case AArch64::LDRBBui:
725 case AArch64::LDRHHui:
726 case AArch64::LDRWui:
727 case AArch64::LDRXui:
729 case AArch64::LDURBBi:
730 case AArch64::LDURHHi:
731 case AArch64::LDURWi:
732 case AArch64::LDURXi:
738 unsigned Opc =
MI.getOpcode();
743 case AArch64::STRSui:
744 case AArch64::STRDui:
745 case AArch64::STRQui:
746 case AArch64::STRXui:
747 case AArch64::STRWui:
748 case AArch64::STRHHui:
749 case AArch64::STRBBui:
750 case AArch64::LDRSui:
751 case AArch64::LDRDui:
752 case AArch64::LDRQui:
753 case AArch64::LDRXui:
754 case AArch64::LDRWui:
755 case AArch64::LDRHHui:
756 case AArch64::LDRBBui:
760 case AArch64::STZ2Gi:
763 case AArch64::STURSi:
764 case AArch64::STURDi:
765 case AArch64::STURQi:
766 case AArch64::STURWi:
767 case AArch64::STURXi:
768 case AArch64::LDURSi:
769 case AArch64::LDURDi:
770 case AArch64::LDURQi:
771 case AArch64::LDURWi:
772 case AArch64::LDURXi:
775 case AArch64::LDPSWi:
804 unsigned Opc =
MI.getOpcode();
810 case AArch64::LDRBroX:
811 case AArch64::LDRBBroX:
812 case AArch64::LDRSBXroX:
813 case AArch64::LDRSBWroX:
816 case AArch64::LDRHroX:
817 case AArch64::LDRHHroX:
818 case AArch64::LDRSHXroX:
819 case AArch64::LDRSHWroX:
822 case AArch64::LDRWroX:
823 case AArch64::LDRSroX:
824 case AArch64::LDRSWroX:
827 case AArch64::LDRDroX:
828 case AArch64::LDRXroX:
831 case AArch64::LDRQroX:
841 case AArch64::ORRWrs:
842 case AArch64::ADDWri:
850 const LdStPairFlags &Flags) {
852 "Expected promotable zero stores.");
860 if (NextI == MergeMI)
863 unsigned Opc =
I->getOpcode();
864 unsigned MergeMIOpc = MergeMI->getOpcode();
865 bool IsScaled = !
TII->hasUnscaledLdStOffset(
Opc);
866 bool IsMergedMIScaled = !
TII->hasUnscaledLdStOffset(MergeMIOpc);
867 int OffsetStride = IsScaled ?
TII->getMemScale(*
I) : 1;
868 int MergeMIOffsetStride = IsMergedMIScaled ?
TII->getMemScale(*MergeMI) : 1;
870 bool MergeForward =
Flags.getMergeForward();
876 const MachineOperand &BaseRegOp =
878 : AArch64InstrInfo::getLdStBaseOp(*
I);
881 int64_t IOffsetInBytes =
883 int64_t MIOffsetInBytes =
888 if (IOffsetInBytes > MIOffsetInBytes)
889 OffsetImm = MIOffsetInBytes;
891 OffsetImm = IOffsetInBytes;
896 if (!
TII->hasUnscaledLdStOffset(NewOpcode)) {
897 int NewOffsetStride =
TII->getMemScale(NewOpcode);
898 assert(((OffsetImm % NewOffsetStride) == 0) &&
899 "Offset should be a multiple of the store memory scale");
900 OffsetImm = OffsetImm / NewOffsetStride;
906 MachineInstrBuilder MIB;
908 .
addReg(isNarrowStore(
Opc) ? AArch64::WZR : AArch64::XZR)
912 .setMIFlags(
I->mergeFlagsWith(*MergeMI));
915 LLVM_DEBUG(
dbgs() <<
"Creating wider store. Replacing instructions:\n ");
924 I->eraseFromParent();
925 MergeMI->eraseFromParent();
935 auto MBB =
MI.getParent();
943 return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
944 TRI->regsOverlap(MOP.getReg(), DefReg);
958 if (MOP.isReg() && MOP.isKill())
962 if (MOP.isReg() && !MOP.isKill())
963 Units.
addReg(MOP.getReg());
970 unsigned InstrNumToSet,
977 unsigned OperandNo = 0;
978 bool RegFound =
false;
979 for (
const auto Op : MergedInstr.
operands()) {
980 if (
Op.getReg() ==
Reg) {
989 {InstrNumToSet, OperandNo});
995 const LdStPairFlags &Flags) {
1002 if (NextI == Paired)
1005 int SExtIdx =
Flags.getSExtIdx();
1008 bool IsUnscaled =
TII->hasUnscaledLdStOffset(
Opc);
1009 int OffsetStride = IsUnscaled ?
TII->getMemScale(*
I) : 1;
1011 bool MergeForward =
Flags.getMergeForward();
1013 std::optional<MCPhysReg> RenameReg =
Flags.getRenameReg();
1016 DefinedInBB.
addReg(*RenameReg);
1020 auto GetMatchingSubReg =
1021 [
this, RenameReg](
const TargetRegisterClass *
C) ->
MCPhysReg {
1023 TRI->sub_and_superregs_inclusive(*RenameReg)) {
1024 if (
C->contains(SubOrSuper))
1030 std::function<bool(MachineInstr &,
bool)> UpdateMIs =
1031 [
this, RegToRename, GetMatchingSubReg, MergeForward](MachineInstr &
MI,
1034 bool SeenDef =
false;
1036 MachineOperand &MOP =
MI.getOperand(
OpIdx);
1040 (!MergeForward || !SeenDef ||
1042 TRI->regsOverlap(MOP.
getReg(), RegToRename)) {
1045 "Need renamable operands");
1047 if (
const TargetRegisterClass *RC =
1049 MatchingReg = GetMatchingSubReg(RC);
1053 MatchingReg = GetMatchingSubReg(
1054 TRI->getMinimalPhysRegClass(MOP.
getReg()));
1062 MachineOperand &MOP =
MI.getOperand(
OpIdx);
1064 TRI->regsOverlap(MOP.
getReg(), RegToRename)) {
1067 "Need renamable operands");
1069 if (
const TargetRegisterClass *RC =
1071 MatchingReg = GetMatchingSubReg(RC);
1073 MatchingReg = GetMatchingSubReg(
1074 TRI->getMinimalPhysRegClass(MOP.
getReg()));
1075 assert(MatchingReg != AArch64::NoRegister &&
1076 "Cannot find matching regs for renaming");
1085 TRI, UINT32_MAX, UpdateMIs);
1098 RegToCheck = RegToRename;
1101 MergeForward ? std::next(
I) :
I,
1102 MergeForward ? std::next(Paired) : Paired))
1104 [
this, RegToCheck](
const MachineOperand &MOP) {
1105 return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1107 !TRI->regsOverlap(MOP.getReg(), RegToCheck);
1109 "Rename register used between paired instruction, trashing the "
1119 const MachineOperand &BaseRegOp =
1121 : AArch64InstrInfo::getLdStBaseOp(*
I);
1125 bool PairedIsUnscaled =
TII->hasUnscaledLdStOffset(Paired->getOpcode());
1126 if (IsUnscaled != PairedIsUnscaled) {
1130 int MemSize =
TII->getMemScale(*Paired);
1131 if (PairedIsUnscaled) {
1134 assert(!(PairedOffset %
TII->getMemScale(*Paired)) &&
1135 "Offset should be a multiple of the stride!");
1136 PairedOffset /= MemSize;
1138 PairedOffset *= MemSize;
1145 MachineInstr *RtMI, *Rt2MI;
1146 if (
Offset == PairedOffset + OffsetStride &&
1154 SExtIdx = (SExtIdx + 1) % 2;
1162 assert(!(OffsetImm %
TII->getMemScale(*RtMI)) &&
1163 "Unscaled offset cannot be scaled.");
1164 OffsetImm /=
TII->getMemScale(*RtMI);
1168 MachineInstrBuilder MIB;
1173 MachineOperand &PairedRegOp = RtMI == &*Paired ? RegOp0 : RegOp1;
1175 if (RegOp0.
isUse()) {
1176 if (!MergeForward) {
1187 for (
auto It = std::next(
I); It != Paired && PairedRegOp.
isKill(); ++It)
1188 if (It->readsRegister(PairedRegOp.
getReg(),
TRI))
1196 for (MachineInstr &
MI :
1197 make_range(std::next(
I->getIterator()), Paired->getIterator()))
1214 .setMIFlags(
I->mergeFlagsWith(*Paired));
1219 dbgs() <<
"Creating pair load/store. Replacing instructions:\n ");
1224 if (SExtIdx != -1) {
1229 MachineOperand &DstMO = MIB->
getOperand(SExtIdx);
1234 Register DstRegW =
TRI->getSubReg(DstRegX, AArch64::sub_32);
1243 MachineInstrBuilder MIBKill =
1244 BuildMI(*
MBB, InsertionPoint,
DL,
TII->get(TargetOpcode::KILL), DstRegW)
1249 MachineInstrBuilder MIBSXTW =
1250 BuildMI(*
MBB, InsertionPoint,
DL,
TII->get(AArch64::SBFMXri), DstRegX)
1290 if (
I->peekDebugInstrNum()) {
1298 unsigned NewInstrNum;
1299 if (DstRegX ==
I->getOperand(0).getReg()) {
1308 if (Paired->peekDebugInstrNum()) {
1316 unsigned NewInstrNum;
1317 if (DstRegX == Paired->getOperand(0).getReg()) {
1330 }
else if (
Opc == AArch64::LDR_ZXI ||
Opc == AArch64::STR_ZXI) {
1336 AArch64::ZPRRegClass.contains(MOp1.
getReg()) &&
"Invalid register.");
1337 MOp0.
setReg(AArch64::Q0 + (MOp0.
getReg() - AArch64::Z0));
1338 MOp1.
setReg(AArch64::Q0 + (MOp1.
getReg() - AArch64::Z0));
1369 if (
I->peekDebugInstrNum()) {
1374 if (Paired->peekDebugInstrNum()) {
1394 SmallSetVector<Register, 4>
Ops;
1395 for (
const MachineOperand &MO :
1397 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1398 Ops.insert(MO.getReg());
1399 for (
const MachineOperand &MO :
1401 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1402 Ops.insert(MO.getReg());
1406 CopyImplicitOps(
I, Paired);
1409 I->eraseFromParent();
1410 Paired->eraseFromParent();
1419 next_nodbg(LoadI, LoadI->getParent()->end());
1421 int LoadSize =
TII->getMemScale(*LoadI);
1422 int StoreSize =
TII->getMemScale(*StoreI);
1426 bool IsStoreXReg =
TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
1429 TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
1430 "Unexpected RegClass");
1432 MachineInstr *BitExtMI;
1433 if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
1436 if (StRt == LdRt && LoadSize == 8) {
1437 for (MachineInstr &
MI :
make_range(StoreI->getIterator(),
1438 LoadI->getIterator())) {
1439 if (
MI.killsRegister(StRt,
TRI)) {
1440 MI.clearRegisterKills(StRt,
TRI);
1447 LoadI->eraseFromParent();
1452 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1453 TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
1454 .
addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
1463 bool IsUnscaled =
TII->hasUnscaledLdStOffset(*LoadI);
1464 assert(IsUnscaled ==
TII->hasUnscaledLdStOffset(*StoreI) &&
1465 "Unsupported ld/st match");
1466 assert(LoadSize <= StoreSize &&
"Invalid load size");
1467 int UnscaledLdOffset =
1471 int UnscaledStOffset =
1475 int Width = LoadSize * 8;
1478 LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
1481 assert((UnscaledLdOffset >= UnscaledStOffset &&
1482 (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
1485 int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
1486 int Imms = Immr + Width - 1;
1487 if (UnscaledLdOffset == UnscaledStOffset) {
1488 uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12)
1494 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1495 TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
1500 }
else if (IsStoreXReg && Imms == 31) {
1503 assert(Immr <= Imms &&
"Expected LSR alias of UBFM");
1504 BitExtMI =
BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1505 TII->get(AArch64::UBFMWri),
1506 TRI->getSubReg(DestReg, AArch64::sub_32))
1507 .
addReg(
TRI->getSubReg(StRt, AArch64::sub_32))
1513 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1514 TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
1524 for (MachineInstr &
MI :
make_range(StoreI->getIterator(),
1526 if (
MI.killsRegister(StRt,
TRI)) {
1527 MI.clearRegisterKills(StRt,
TRI);
1542 LoadI->eraseFromParent();
1552 if (
Offset % OffsetStride)
1564 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1581bool AArch64LoadStoreOpt::findMatchingStore(
1586 MachineInstr &LoadMI = *
I;
1596 ModifiedRegUnits.
clear();
1597 UsedRegUnits.
clear();
1602 MachineInstr &
MI = *
MBBI;
1606 if (!
MI.isTransient())
1632 if (!ModifiedRegUnits.
available(BaseReg))
1650 LdStPairFlags &Flags,
1653 if (
MI.hasOrderedMemoryRef() ||
TII->isLdStPairSuppressed(
MI))
1658 !
TII->isLdStPairSuppressed(FirstMI) &&
1659 "FirstMI shouldn't get here if either of these checks are true.");
1666 unsigned OpcB =
MI.getOpcode();
1674 if (OpcA == AArch64::LDR_ZXI || OpcA == AArch64::STR_ZXI ||
1675 OpcB == AArch64::LDR_ZXI || OpcB == AArch64::STR_ZXI)
1683 bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1685 assert(IsValidLdStrOpc &&
1686 "Given Opc should be a Load or Store with an immediate");
1689 Flags.setSExtIdx(NonSExtOpc == OpcA ? 1 : 0);
1695 if (!PairIsValidLdStrOpc)
1700 if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1703 TII->getMemScale(FirstMI) ==
TII->getMemScale(
MI);
1712 return TII->hasUnscaledLdStOffset(OpcA) !=
TII->hasUnscaledLdStOffset(OpcB) &&
1721 auto *RegClass =
TRI->getMinimalPhysRegClass(MOP.
getReg());
1728 if (RegClass->HasDisjunctSubRegs && RegClass->CoveredBySubRegs &&
1729 (
TRI->getSubRegisterClass(RegClass, AArch64::dsub0) ||
1730 TRI->getSubRegisterClass(RegClass, AArch64::qsub0) ||
1731 TRI->getSubRegisterClass(RegClass, AArch64::zsub0))) {
1734 <<
" Cannot rename operands with multiple disjunct subregisters ("
1745 return TRI->isSuperOrSubRegisterEq(
1768 return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1769 MOP.isImplicit() && MOP.isKill() &&
1770 TRI->regsOverlap(RegToRename, MOP.getReg());
1776 bool FoundDef =
false;
1807 if (
MI.isPseudo()) {
1808 LLVM_DEBUG(
dbgs() <<
" Cannot rename pseudo/bundle instruction\n");
1812 for (
auto &MOP :
MI.operands()) {
1814 !
TRI->regsOverlap(MOP.
getReg(), RegToRename))
1824 for (
auto &MOP :
MI.operands()) {
1826 !
TRI->regsOverlap(MOP.
getReg(), RegToRename))
1843 LLVM_DEBUG(
dbgs() <<
" Did not find definition for register in BB\n");
1871 LLVM_DEBUG(dbgs() <<
"Checking " << MI);
1873 if (MI.getFlag(MachineInstr::FrameSetup)) {
1874 LLVM_DEBUG(dbgs() <<
" Cannot rename framesetup instructions "
1879 for (
auto &MOP :
MI.operands()) {
1880 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1881 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1883 if (!canRenameMOP(MOP, TRI)) {
1884 LLVM_DEBUG(dbgs() <<
" Cannot rename " << MOP <<
" in " << MI);
1910 auto AnySubOrSuperRegCalleePreserved = [&MF,
TRI](
MCPhysReg PR) {
1911 return any_of(
TRI->sub_and_superregs_inclusive(PR),
1913 return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
1919 auto CanBeUsedForAllClasses = [&RequiredClasses,
TRI](
MCPhysReg PR) {
1922 TRI->sub_and_superregs_inclusive(PR),
1923 [
C](
MCPhysReg SubOrSuper) { return C->contains(SubOrSuper); });
1927 auto *RegClass =
TRI->getMinimalPhysRegClass(
Reg);
1930 !
RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
1931 CanBeUsedForAllClasses(PR)) {
1939 <<
TRI->getRegClassName(RegClass) <<
"\n");
1940 return std::nullopt;
1951 std::optional<MCPhysReg> RenameReg;
1960 const bool IsLoad = FirstMI.
mayLoad();
1962 if (!MaybeCanRename) {
1965 RequiredClasses,
TRI)};
1971 if (*MaybeCanRename) {
1973 RequiredClasses,
TRI);
1982 LdStPairFlags &Flags,
unsigned Limit,
1983 bool FindNarrowMerge) {
1987 MachineInstr &FirstMI = *
I;
1991 bool IsUnscaled =
TII->hasUnscaledLdStOffset(FirstMI);
1995 int OffsetStride = IsUnscaled ?
TII->getMemScale(FirstMI) : 1;
1998 std::optional<bool> MaybeCanRename;
2000 MaybeCanRename = {
false};
2002 SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses;
2003 LiveRegUnits UsedInBetween;
2006 Flags.clearRenameReg();
2010 ModifiedRegUnits.
clear();
2011 UsedRegUnits.
clear();
2014 SmallVector<MachineInstr *, 4> MemInsns;
2019 MachineInstr &
MI = *
MBBI;
2026 if (!
MI.isTransient())
2029 Flags.setSExtIdx(-1);
2032 assert(
MI.mayLoadOrStore() &&
"Expected memory operation.");
2041 bool MIIsUnscaled =
TII->hasUnscaledLdStOffset(
MI);
2042 if (IsUnscaled != MIIsUnscaled) {
2046 int MemSize =
TII->getMemScale(
MI);
2050 if (MIOffset % MemSize) {
2056 MIOffset /= MemSize;
2058 MIOffset *= MemSize;
2064 if (BaseReg == MIBaseReg) {
2070 bool IsOutOfBounds = MIOffset !=
TII->getMemScale(
MI);
2071 bool IsBaseRegUsed = !UsedRegUnits.
available(
2073 bool IsBaseRegModified = !ModifiedRegUnits.
available(
2078 bool IsMIRegTheSame =
2081 if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
2089 if ((
Offset != MIOffset + OffsetStride) &&
2090 (
Offset + OffsetStride != MIOffset)) {
2099 if (FindNarrowMerge) {
2104 if ((!IsUnscaled &&
alignTo(MinOffset, 2) != MinOffset) ||
2121 <<
"keep looking.\n");
2127 if (IsUnscaled && (
alignTo(MinOffset, OffsetStride) != MinOffset)) {
2132 <<
"Offset doesn't fit due to alignment requirements, "
2133 <<
"keep looking.\n");
2144 if (!ModifiedRegUnits.
available(BaseReg))
2147 const bool SameLoadReg =
MayLoad &&
TRI->isSuperOrSubRegisterEq(
2154 bool RtNotModified =
2156 bool RtNotUsed = !(
MI.mayLoad() && !SameLoadReg &&
2159 LLVM_DEBUG(
dbgs() <<
"Checking, can combine 2nd into 1st insn:\n"
2161 << (RtNotModified ?
"true" :
"false") <<
"\n"
2163 << (RtNotUsed ?
"true" :
"false") <<
"\n");
2165 if (RtNotModified && RtNotUsed && !
mayAlias(
MI, MemInsns, AA)) {
2170 std::optional<MCPhysReg> RenameReg =
2172 Reg, DefinedInBB, UsedInBetween,
2173 RequiredClasses,
TRI);
2179 <<
"keep looking.\n");
2182 Flags.setRenameReg(*RenameReg);
2185 Flags.setMergeForward(
false);
2187 Flags.clearRenameReg();
2198 LLVM_DEBUG(
dbgs() <<
"Checking, can combine 1st into 2nd insn:\n"
2200 <<
"' not modified: "
2201 << (RtNotModified ?
"true" :
"false") <<
"\n");
2203 if (RtNotModified && !
mayAlias(FirstMI, MemInsns, AA)) {
2205 Flags.setMergeForward(
true);
2206 Flags.clearRenameReg();
2211 MaybeCanRename, FirstMI,
MI,
Reg, DefinedInBB, UsedInBetween,
2212 RequiredClasses,
TRI);
2214 Flags.setMergeForward(
true);
2215 Flags.setRenameReg(*RenameReg);
2216 MBBIWithRenameReg =
MBBI;
2219 LLVM_DEBUG(
dbgs() <<
"Unable to combine these instructions due to "
2220 <<
"interference in between, keep looking.\n");
2224 if (
Flags.getRenameReg())
2225 return MBBIWithRenameReg;
2239 if (!ModifiedRegUnits.
available(BaseReg)) {
2245 if (
MI.mayLoadOrStore())
2253 assert((
MI.getOpcode() == AArch64::SUBXri ||
2254 MI.getOpcode() == AArch64::ADDXri) &&
2255 "Expected a register update instruction");
2256 auto End =
MI.getParent()->end();
2257 if (MaybeCFI == End ||
2258 MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
2261 MI.getOperand(0).getReg() != AArch64::SP)
2265 unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();
2276std::optional<MachineBasicBlock::iterator> AArch64LoadStoreOpt::mergeUpdateInsn(
2278 bool IsForward,
bool IsPreIdx,
bool MergeEither) {
2279 assert((Update->getOpcode() == AArch64::ADDXri ||
2280 Update->getOpcode() == AArch64::SUBXri) &&
2281 "Unexpected base register update instruction to merge!");
2297 if (std::any_of(std::next(CFI),
I, [](
const auto &Insn) {
2298 return Insn.getOpcode() == TargetOpcode::CFI_INSTRUCTION;
2300 return std::nullopt;
2302 MachineBasicBlock *
MBB = InsertPt->getParent();
2311 if (NextI == Update)
2314 int Value = Update->getOperand(2).getImm();
2316 "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
2317 if (Update->getOpcode() == AArch64::SUBXri)
2322 MachineInstrBuilder MIB;
2323 int Scale, MinOffset, MaxOffset;
2327 MIB =
BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2329 .
add(Update->getOperand(0))
2337 MIB =
BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2339 .
add(Update->getOperand(0))
2364 I->eraseFromParent();
2365 Update->eraseFromParent();
2373 unsigned Offset,
int Scale) {
2374 assert((Update->getOpcode() == AArch64::MOVKWi) &&
2375 "Unexpected const mov instruction to merge!");
2379 MachineInstr &MemMI = *
I;
2380 unsigned Mask = (1 << 12) * Scale - 1;
2385 MachineInstrBuilder AddMIB, MemMIB;
2389 BuildMI(*
I->getParent(),
I,
I->getDebugLoc(),
TII->get(AArch64::ADDXri))
2397 MemMIB =
BuildMI(*
I->getParent(),
I,
I->getDebugLoc(),
TII->get(NewOpc))
2405 ++NumConstOffsetFolded;
2420 I->eraseFromParent();
2421 PrevI->eraseFromParent();
2422 Update->eraseFromParent();
2427bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
2429 unsigned BaseReg,
int Offset) {
2430 switch (
MI.getOpcode()) {
2433 case AArch64::SUBXri:
2434 case AArch64::ADDXri:
2437 if (!
MI.getOperand(2).isImm())
2445 if (
MI.getOperand(0).getReg() != BaseReg ||
2446 MI.getOperand(1).getReg() != BaseReg)
2449 int UpdateOffset =
MI.getOperand(2).getImm();
2450 if (
MI.getOpcode() == AArch64::SUBXri)
2451 UpdateOffset = -UpdateOffset;
2455 int Scale, MinOffset, MaxOffset;
2457 if (UpdateOffset % Scale != 0)
2461 int ScaledOffset = UpdateOffset / Scale;
2462 if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
2474bool AArch64LoadStoreOpt::isMatchingMovConstInsn(MachineInstr &MemMI,
2480 if (
MI.getOpcode() == AArch64::MOVKWi &&
2481 TRI->isSuperOrSubRegisterEq(IndexReg,
MI.getOperand(1).getReg())) {
2490 MachineInstr &MovzMI = *
MBBI;
2492 if (MovzMI.
getOpcode() == AArch64::MOVZWi &&
2495 unsigned High =
MI.getOperand(2).getImm() <<
MI.getOperand(3).getImm();
2498 return Offset >> 24 == 0;
2507 MachineInstr &MemMI = *
I;
2512 TII->getMemScale(MemMI);
2517 if (MIUnscaledOffset != UnscaledOffset)
2528 for (
unsigned i = 0, e = IsPairedInsn ? 2 : 1; i !=
e; ++i) {
2530 if (DestReg == BaseReg ||
TRI->isSubRegister(BaseReg, DestReg))
2537 ModifiedRegUnits.
clear();
2538 UsedRegUnits.
clear();
2544 const bool BaseRegSP =
BaseReg == AArch64::SP;
2553 MachineBasicBlock *CurMBB =
I->getParent();
2560 MachineInstr &
MI = *
MBBI;
2564 if (!
MI.isTransient())
2568 if (isMatchingUpdateInsn(*
I,
MI, BaseReg, UnscaledOffset))
2579 if (!ModifiedRegUnits.
available(BaseReg) ||
2581 (BaseRegSP &&
MBBI->mayLoadOrStore()))
2585 if (!VisitSucc || Limit <=
Count)
2590 MachineBasicBlock *SuccToVisit =
nullptr;
2591 unsigned LiveSuccCount = 0;
2592 for (MachineBasicBlock *Succ : CurMBB->
successors()) {
2593 for (MCRegAliasIterator AI(BaseReg,
TRI,
true); AI.isValid(); ++AI) {
2594 if (Succ->isLiveIn(*AI)) {
2595 if (LiveSuccCount++)
2597 if (Succ->pred_size() == 1)
2605 CurMBB = SuccToVisit;
2616 MachineInstr &MemMI = *
I;
2618 MachineFunction &MF = *MemMI.
getMF();
2626 : AArch64::NoRegister};
2635 for (
unsigned i = 0, e = IsPairedInsn ? 2 : 1; i !=
e; ++i)
2636 if (DestReg[i] == BaseReg ||
TRI->isSubRegister(BaseReg, DestReg[i]))
2640 const bool BaseRegSP =
BaseReg == AArch64::SP;
2648 const AArch64Subtarget &Subtarget = MF.
getSubtarget<AArch64Subtarget>();
2649 unsigned RedZoneSize =
2654 ModifiedRegUnits.
clear();
2655 UsedRegUnits.
clear();
2657 bool MemAccessBeforeSPPreInc =
false;
2661 MachineInstr &
MI = *
MBBI;
2665 if (!
MI.isTransient())
2669 if (isMatchingUpdateInsn(*
I,
MI, BaseReg,
Offset)) {
2672 if (MemAccessBeforeSPPreInc &&
MBBI->getOperand(2).getImm() > RedZoneSize)
2682 if (!ModifiedRegUnits.
available(BaseReg) ||
2690 if (
MI.mayLoadOrStore() ||
MI.hasUnmodeledSideEffects() ||
2691 (DestReg[0] != AArch64::NoRegister &&
2692 !(ModifiedRegUnits.
available(DestReg[0]) &&
2694 (DestReg[1] != AArch64::NoRegister &&
2695 !(ModifiedRegUnits.
available(DestReg[1]) &&
2697 MergeEither =
false;
2702 if (BaseRegSP &&
MBBI->mayLoadOrStore())
2703 MemAccessBeforeSPPreInc =
true;
2709AArch64LoadStoreOpt::findMatchingConstOffsetBackward(
2713 MachineInstr &MemMI = *
I;
2732 ModifiedRegUnits.
clear();
2733 UsedRegUnits.
clear();
2737 MachineInstr &
MI = *
MBBI;
2741 if (!
MI.isTransient())
2745 if (isMatchingMovConstInsn(*
I,
MI, IndexReg,
Offset)) {
2754 if (!ModifiedRegUnits.
available(IndexReg) ||
2762bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
2764 MachineInstr &
MI = *
MBBI;
2766 if (
MI.hasOrderedMemoryRef())
2780 ++NumLoadsFromStoresPromoted;
2784 MBBI = promoteLoadFromStore(
MBBI, StoreI);
2791bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
2794 MachineInstr &
MI = *
MBBI;
2797 if (!
TII->isCandidateToMergeOrPair(
MI))
2801 LdStPairFlags
Flags;
2805 ++NumZeroStoresPromoted;
2809 MBBI = mergeNarrowZeroStores(
MBBI, MergeMI, Flags);
2818 MachineInstr &
MI = *
MBBI;
2821 if (!
TII->isCandidateToMergeOrPair(
MI))
2825 if (
MI.mayLoad() && Subtarget->hasDisableLdp())
2829 if (
MI.mayStore() && Subtarget->hasDisableStp())
2835 bool IsUnscaled =
TII->hasUnscaledLdStOffset(
MI);
2837 int OffsetStride = IsUnscaled ?
TII->getMemScale(
MI) : 1;
2845 LdStPairFlags
Flags;
2851 auto Prev = std::prev(
MBBI);
2855 MachineMemOperand *MemOp =
2856 MI.memoperands_empty() ? nullptr :
MI.memoperands().front();
2861 if ((
MI.mayLoad() && Subtarget->hasLdpAlignedOnly()) ||
2862 (
MI.mayStore() && Subtarget->hasStpAlignedOnly())) {
2864 if (!MemOp || !MemOp->getMemoryType().isValid()) {
2865 NumFailedAlignmentCheck++;
2871 uint64_t MemAlignment = MemOp->getAlign().value();
2872 uint64_t TypeAlignment =
2873 Align(MemOp->getSize().getValue().getKnownMinValue()).value();
2875 if (MemAlignment < 2 * TypeAlignment) {
2876 NumFailedAlignmentCheck++;
2882 if (
TII->hasUnscaledLdStOffset(
MI))
2883 ++NumUnscaledPairCreated;
2885 MBBI = mergePairedInsns(
MBBI, Paired, Flags);
2888 for (
auto I = std::next(Prev);
I !=
MBBI;
I++)
2896bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
2898 MachineInstr &
MI = *
MBBI;
2910 if (
auto NextI = mergeUpdateInsn(
MBBI, Update,
false,
2919 if (
TII->hasUnscaledLdStOffset(
MI.getOpcode()))
2928 Update = findMatchingUpdateInsnBackward(
MBBI,
UpdateLimit, MergeEither);
2931 if (
auto NextI = mergeUpdateInsn(
MBBI, Update,
true,
2932 true, MergeEither)) {
2941 int UnscaledOffset =
2949 Update = findMatchingUpdateInsnForward(
MBBI, UnscaledOffset,
UpdateLimit);
2952 if (
auto NextI = mergeUpdateInsn(
MBBI, Update,
false,
2965 MachineInstr &
MI = *
MBBI;
2970 if (
TII->hasUnscaledLdStOffset(
MI.getOpcode()))
2982 if (Update !=
E && (
Offset & (Scale - 1)) == 0) {
2991bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &
MBB,
2992 bool EnableNarrowZeroStOpt) {
3024 if (EnableNarrowZeroStOpt)
3048 DefinedInBB.
clear();
3057 if (
TII->isPairableLdStInst(*
MBBI) && tryToPairLdStInst(
MBBI))
3096bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
3103 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
3113 bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
3114 for (
auto &
MBB : Fn) {
3135 return new AArch64LoadStoreOpt();
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)
static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)
static bool isPromotableLoadFromStore(MachineInstr &MI)
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
static unsigned getMatchingPairOpcode(unsigned Opc)
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
static std::optional< MCPhysReg > tryToFindRegisterToRename(const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool needsWinCFI(const MachineFunction *MF)
static bool canRenameUntilSecondLoad(MachineInstr &FirstLoad, MachineInstr &SecondLoad, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static std::optional< MCPhysReg > findRenameRegForSameLdStRegPair(std::optional< bool > MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
static bool canRenameMOP(const MachineOperand &MOP, const TargetRegisterInfo *TRI)
static unsigned getPreIndexedOpcode(unsigned Opc)
#define AARCH64_LOAD_STORE_OPT_NAME
static void addDebugSubstitutionsToTable(MachineFunction *MF, unsigned InstrNumToSet, MachineInstr &OriginalInstr, MachineInstr &MergedInstr)
This function will add a new entry into the debugValueSubstitutions table when two instruction have b...
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
static bool isPromotableZeroStoreInst(MachineInstr &MI)
static unsigned getMatchingWideOpcode(unsigned Opc)
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
static MachineBasicBlock::iterator maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI)
static bool isTagStore(const MachineInstr &MI)
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)
static bool isRewritableImplicitDef(unsigned Opc)
static unsigned getPostIndexedOpcode(unsigned Opc)
static bool isMergeableLdStUpdate(MachineInstr &MI, AArch64FunctionInfo &AFI)
static cl::opt< unsigned > LdStConstLimit("aarch64-load-store-const-scan-limit", cl::init(10), cl::Hidden)
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI)
static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale)
static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)
static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static unsigned getBaseAddressOpcode(unsigned Opc)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
const AArch64RegisterInfo * getRegisterInfo() const override
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
bool isLittleEndian() const
unsigned getRedZoneSize(const Function &F) const
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
static bool shouldExecute(unsigned CounterName)
FunctionPass class - This class is used to implement most global optimizations.
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A set of register units used to track register liveness.
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
void clear()
Clears the set.
LLVM_ABI void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
An instruction for reading from memory.
bool usesWindowsCFI() const
OpType getOperation() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI bool mayAlias(BatchAAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
unsigned peekDebugInstrNum() const
Examine the instruction number of this MachineInstr.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
LLVM_ABI void dump() const
LLVM_ABI unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setImplicit(bool Val=true)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
LLVM_ABI bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
Wrapper class representing virtual and physical registers.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
initializer< Ty > init(const Ty &Val)
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< filter_iterator< ConstMIBundleOperands, bool(*)(const MachineOperand &)> > phys_regs_and_masks(const MachineInstr &MI)
Returns an iterator range over all physical register and mask operands for MI and bundled instruction...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
FunctionPass * createAArch64LoadStoreOptimizationPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.