35#define DEBUG_TYPE "si-memory-legalizer"
36#define PASS_NAME "SI Memory Legalizer"
40 cl::desc(
"Use this to skip inserting cache invalidating instructions."));
62enum class SIAtomicScope {
74enum class SIAtomicAddrSpace {
83 FLAT = GLOBAL |
LDS | SCRATCH,
86 ATOMIC = GLOBAL |
LDS | SCRATCH | GDS,
89 ALL = GLOBAL |
LDS | SCRATCH | GDS | OTHER,
94class SIMemOpInfo final {
97 friend class SIMemOpAccess;
101 SIAtomicScope Scope = SIAtomicScope::SYSTEM;
102 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
103 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
104 bool IsCrossAddressSpaceOrdering =
false;
105 bool IsVolatile =
false;
106 bool IsNonTemporal =
false;
107 bool IsLastUse =
false;
108 bool IsCooperative =
false;
112 const GCNSubtarget &ST,
114 SIAtomicScope Scope = SIAtomicScope::SYSTEM,
115 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
116 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
117 bool IsCrossAddressSpaceOrdering =
true,
118 AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent,
119 bool IsVolatile =
false,
bool IsNonTemporal =
false,
120 bool IsLastUse =
false,
bool IsCooperative =
false)
121 : Ordering(Ordering), FailureOrdering(FailureOrdering), Scope(Scope),
122 OrderingAddrSpace(OrderingAddrSpace), InstrAddrSpace(InstrAddrSpace),
123 IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
124 IsVolatile(IsVolatile), IsNonTemporal(IsNonTemporal),
125 IsLastUse(IsLastUse), IsCooperative(IsCooperative) {
127 if (Ordering == AtomicOrdering::NotAtomic) {
128 assert(!IsCooperative &&
"Cannot be cooperative & non-atomic!");
129 assert(Scope == SIAtomicScope::NONE &&
130 OrderingAddrSpace == SIAtomicAddrSpace::NONE &&
131 !IsCrossAddressSpaceOrdering &&
132 FailureOrdering == AtomicOrdering::NotAtomic);
136 assert(Scope != SIAtomicScope::NONE &&
137 (OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
138 SIAtomicAddrSpace::NONE &&
139 (InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
140 SIAtomicAddrSpace::NONE);
145 if ((OrderingAddrSpace == InstrAddrSpace) &&
147 this->IsCrossAddressSpaceOrdering =
false;
151 if ((InstrAddrSpace & ~SIAtomicAddrSpace::SCRATCH) ==
152 SIAtomicAddrSpace::NONE) {
153 this->Scope = std::min(Scope, SIAtomicScope::SINGLETHREAD);
154 }
else if ((InstrAddrSpace &
155 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS)) ==
156 SIAtomicAddrSpace::NONE) {
157 this->Scope = std::min(Scope, SIAtomicScope::WORKGROUP);
158 }
else if ((InstrAddrSpace &
159 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS |
160 SIAtomicAddrSpace::GDS)) == SIAtomicAddrSpace::NONE) {
161 this->Scope = std::min(Scope, SIAtomicScope::AGENT);
166 if (this->Scope == SIAtomicScope::CLUSTER && !
ST.hasClusters())
167 this->Scope = SIAtomicScope::AGENT;
173 SIAtomicScope getScope()
const {
186 return FailureOrdering;
191 SIAtomicAddrSpace getInstrAddrSpace()
const {
192 return InstrAddrSpace;
197 SIAtomicAddrSpace getOrderingAddrSpace()
const {
198 return OrderingAddrSpace;
203 bool getIsCrossAddressSpaceOrdering()
const {
204 return IsCrossAddressSpaceOrdering;
209 bool isVolatile()
const {
215 bool isNonTemporal()
const {
216 return IsNonTemporal;
221 bool isLastUse()
const {
return IsLastUse; }
224 bool isCooperative()
const {
return IsCooperative; }
228 bool isAtomic()
const {
229 return Ordering != AtomicOrdering::NotAtomic;
234class SIMemOpAccess final {
236 const AMDGPUMachineModuleInfo *MMI =
nullptr;
237 const GCNSubtarget &ST;
241 const char *Msg)
const;
247 std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
248 toSIAtomicScope(
SyncScope::ID SSID, SIAtomicAddrSpace InstrAddrSpace)
const;
251 SIAtomicAddrSpace toSIAtomicAddrSpace(
unsigned AS)
const;
255 std::optional<SIMemOpInfo>
261 SIMemOpAccess(
const AMDGPUMachineModuleInfo &MMI,
const GCNSubtarget &ST);
264 std::optional<SIMemOpInfo>
269 std::optional<SIMemOpInfo>
274 std::optional<SIMemOpInfo>
279 std::optional<SIMemOpInfo>
285 std::optional<SIMemOpInfo>
289class SICacheControl {
293 const GCNSubtarget &ST;
296 const SIInstrInfo *TII =
nullptr;
303 SICacheControl(
const GCNSubtarget &ST);
312 bool canAffectGlobalAddrSpace(SIAtomicAddrSpace AS)
const;
317 static std::unique_ptr<SICacheControl> create(
const GCNSubtarget &ST);
324 SIAtomicAddrSpace AddrSpace)
const = 0;
331 SIAtomicAddrSpace AddrSpace)
const = 0;
338 SIAtomicAddrSpace AddrSpace)
const = 0;
344 SIAtomicAddrSpace AddrSpace,
345 SIMemOp
Op,
bool IsVolatile,
347 bool IsLastUse =
false)
const = 0;
354 virtual bool finalizeStore(MachineInstr &
MI,
bool Atomic)
const {
359 virtual bool handleCooperativeAtomic(MachineInstr &
MI)
const {
361 "cooperative atomics are not available on this architecture");
374 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
375 bool IsCrossAddrSpaceOrdering, Position Pos,
385 SIAtomicAddrSpace AddrSpace,
386 Position Pos)
const = 0;
396 SIAtomicAddrSpace AddrSpace,
397 bool IsCrossAddrSpaceOrdering,
398 Position Pos)
const = 0;
401 virtual ~SICacheControl() =
default;
404class SIGfx6CacheControl :
public SICacheControl {
421 SIGfx6CacheControl(
const GCNSubtarget &ST) : SICacheControl(
ST) {}
425 SIAtomicAddrSpace AddrSpace)
const override;
429 SIAtomicAddrSpace AddrSpace)
const override;
433 SIAtomicAddrSpace AddrSpace)
const override;
436 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
437 bool IsVolatile,
bool IsNonTemporal,
438 bool IsLastUse)
const override;
441 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
442 bool IsCrossAddrSpaceOrdering, Position Pos,
447 SIAtomicAddrSpace AddrSpace,
448 Position Pos)
const override;
452 SIAtomicAddrSpace AddrSpace,
453 bool IsCrossAddrSpaceOrdering,
454 Position Pos)
const override;
457class SIGfx7CacheControl :
public SIGfx6CacheControl {
460 SIGfx7CacheControl(
const GCNSubtarget &ST) : SIGfx6CacheControl(
ST) {}
464 SIAtomicAddrSpace AddrSpace,
465 Position Pos)
const override;
469class SIGfx90ACacheControl :
public SIGfx7CacheControl {
472 SIGfx90ACacheControl(
const GCNSubtarget &ST) : SIGfx7CacheControl(
ST) {}
476 SIAtomicAddrSpace AddrSpace)
const override;
480 SIAtomicAddrSpace AddrSpace)
const override;
483 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
484 bool IsVolatile,
bool IsNonTemporal,
485 bool IsLastUse)
const override;
488 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
489 bool IsCrossAddrSpaceOrdering, Position Pos,
494 SIAtomicAddrSpace AddrSpace,
495 Position Pos)
const override;
499 SIAtomicAddrSpace AddrSpace,
500 bool IsCrossAddrSpaceOrdering,
501 Position Pos)
const override;
504class SIGfx940CacheControl :
public SIGfx90ACacheControl {
526 SIGfx940CacheControl(
const GCNSubtarget &ST) : SIGfx90ACacheControl(
ST) {};
530 SIAtomicAddrSpace AddrSpace)
const override;
534 SIAtomicAddrSpace AddrSpace)
const override;
538 SIAtomicAddrSpace AddrSpace)
const override;
541 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
542 bool IsVolatile,
bool IsNonTemporal,
543 bool IsLastUse)
const override;
546 SIAtomicAddrSpace AddrSpace, Position Pos)
const override;
549 SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
550 Position Pos)
const override;
553class SIGfx10CacheControl :
public SIGfx7CacheControl {
564 SIGfx10CacheControl(
const GCNSubtarget &ST) : SIGfx7CacheControl(
ST) {}
568 SIAtomicAddrSpace AddrSpace)
const override;
571 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
572 bool IsVolatile,
bool IsNonTemporal,
573 bool IsLastUse)
const override;
576 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
577 bool IsCrossAddrSpaceOrdering, Position Pos,
581 SIAtomicAddrSpace AddrSpace, Position Pos)
const override;
584class SIGfx11CacheControl :
public SIGfx10CacheControl {
586 SIGfx11CacheControl(
const GCNSubtarget &ST) : SIGfx10CacheControl(
ST) {}
590 SIAtomicAddrSpace AddrSpace)
const override;
593 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
594 bool IsVolatile,
bool IsNonTemporal,
595 bool IsLastUse)
const override;
598class SIGfx12CacheControl :
public SIGfx11CacheControl {
619 SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace)
const;
622 SIGfx12CacheControl(
const GCNSubtarget &ST) : SIGfx11CacheControl(
ST) {
625 assert(!
ST.hasGFX1250Insts() ||
ST.isCuModeEnabled());
629 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
630 bool IsCrossAddrSpaceOrdering, Position Pos,
634 SIAtomicAddrSpace AddrSpace, Position Pos)
const override;
637 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
638 bool IsVolatile,
bool IsNonTemporal,
639 bool IsLastUse)
const override;
641 bool finalizeStore(MachineInstr &
MI,
bool Atomic)
const override;
643 bool handleCooperativeAtomic(MachineInstr &
MI)
const override;
646 SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
647 Position Pos)
const override;
651 SIAtomicAddrSpace AddrSpace)
const override {
652 return setAtomicScope(
MI, Scope, AddrSpace);
657 SIAtomicAddrSpace AddrSpace)
const override {
658 return setAtomicScope(
MI, Scope, AddrSpace);
663 SIAtomicAddrSpace AddrSpace)
const override {
664 return setAtomicScope(
MI, Scope, AddrSpace);
668class SIMemoryLegalizer final {
670 const MachineModuleInfo &MMI;
672 std::unique_ptr<SICacheControl> CC =
nullptr;
675 std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
679 bool isAtomicRet(
const MachineInstr &
MI)
const {
685 bool removeAtomicPseudoMIs();
689 bool expandLoad(
const SIMemOpInfo &MOI,
693 bool expandStore(
const SIMemOpInfo &MOI,
697 bool expandAtomicFence(
const SIMemOpInfo &MOI,
701 bool expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
708 SIMemoryLegalizer(
const MachineModuleInfo &MMI) : MMI(MMI) {};
709 bool run(MachineFunction &MF);
716 SIMemoryLegalizerLegacy() : MachineFunctionPass(ID) {}
718 void getAnalysisUsage(AnalysisUsage &AU)
const override {
723 StringRef getPassName()
const override {
727 bool runOnMachineFunction(MachineFunction &MF)
override;
731 {
"global", SIAtomicAddrSpace::GLOBAL},
732 {
"local", SIAtomicAddrSpace::LDS},
740 OS <<
"unknown address space '" << AS <<
"'; expected one of ";
742 for (
const auto &[Name, Val] : ASNames)
743 OS <<
LS <<
'\'' <<
Name <<
'\'';
751static std::optional<SIAtomicAddrSpace>
753 static constexpr StringLiteral FenceASPrefix =
"amdgpu-synchronize-as";
759 SIAtomicAddrSpace
Result = SIAtomicAddrSpace::NONE;
760 for (
const auto &[Prefix, Suffix] : MMRA) {
761 if (Prefix != FenceASPrefix)
764 if (
auto It = ASNames.find(Suffix); It != ASNames.end())
767 diagnoseUnknownMMRAASName(
MI, Suffix);
770 if (Result == SIAtomicAddrSpace::NONE)
779 const char *Msg)
const {
780 const Function &
Func =
MI->getParent()->getParent()->getFunction();
781 Func.getContext().diagnose(
782 DiagnosticInfoUnsupported(Func, Msg,
MI->getDebugLoc()));
785std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
787 SIAtomicAddrSpace InstrAddrSpace)
const {
789 return std::tuple(SIAtomicScope::SYSTEM, SIAtomicAddrSpace::ATOMIC,
true);
791 return std::tuple(SIAtomicScope::AGENT, SIAtomicAddrSpace::ATOMIC,
true);
793 return std::tuple(SIAtomicScope::CLUSTER, SIAtomicAddrSpace::ATOMIC,
true);
795 return std::tuple(SIAtomicScope::WORKGROUP, SIAtomicAddrSpace::ATOMIC,
798 return std::tuple(SIAtomicScope::WAVEFRONT, SIAtomicAddrSpace::ATOMIC,
801 return std::tuple(SIAtomicScope::SINGLETHREAD, SIAtomicAddrSpace::ATOMIC,
804 return std::tuple(SIAtomicScope::SYSTEM,
805 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
807 return std::tuple(SIAtomicScope::AGENT,
808 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
810 return std::tuple(SIAtomicScope::CLUSTER,
811 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
813 return std::tuple(SIAtomicScope::WORKGROUP,
814 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
816 return std::tuple(SIAtomicScope::WAVEFRONT,
817 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
819 return std::tuple(SIAtomicScope::SINGLETHREAD,
820 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
824SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(
unsigned AS)
const {
826 return SIAtomicAddrSpace::FLAT;
828 return SIAtomicAddrSpace::GLOBAL;
830 return SIAtomicAddrSpace::LDS;
832 return SIAtomicAddrSpace::SCRATCH;
834 return SIAtomicAddrSpace::GDS;
837 return SIAtomicAddrSpace::GLOBAL;
839 return SIAtomicAddrSpace::OTHER;
842SIMemOpAccess::SIMemOpAccess(
const AMDGPUMachineModuleInfo &MMI_,
843 const GCNSubtarget &ST)
844 : MMI(&MMI_),
ST(
ST) {}
846std::optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
848 assert(
MI->getNumMemOperands() > 0);
853 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
854 bool IsNonTemporal =
true;
856 bool IsLastUse =
false;
857 bool IsCooperative =
false;
861 for (
const auto &MMO :
MI->memoperands()) {
862 IsNonTemporal &= MMO->isNonTemporal();
864 IsLastUse |= MMO->getFlags() &
MOLastUse;
867 toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
869 if (OpOrdering != AtomicOrdering::NotAtomic) {
870 const auto &IsSyncScopeInclusion =
872 if (!IsSyncScopeInclusion) {
873 reportUnsupported(
MI,
874 "Unsupported non-inclusive atomic synchronization scope");
878 SSID = *IsSyncScopeInclusion ? SSID : MMO->getSyncScopeID();
880 assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&
881 MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);
887 SIAtomicScope
Scope = SIAtomicScope::NONE;
888 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
889 bool IsCrossAddressSpaceOrdering =
false;
890 if (Ordering != AtomicOrdering::NotAtomic) {
891 auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
893 reportUnsupported(
MI,
"Unsupported atomic synchronization scope");
896 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
898 if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
899 ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace) ||
900 ((InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) == SIAtomicAddrSpace::NONE)) {
901 reportUnsupported(
MI,
"Unsupported atomic address space");
905 return SIMemOpInfo(ST, Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
906 IsCrossAddressSpaceOrdering, FailureOrdering, IsVolatile,
907 IsNonTemporal, IsLastUse, IsCooperative);
910std::optional<SIMemOpInfo>
914 if (!(
MI->mayLoad() && !
MI->mayStore()))
918 if (
MI->getNumMemOperands() == 0)
919 return SIMemOpInfo(ST);
921 return constructFromMIWithMMO(
MI);
924std::optional<SIMemOpInfo>
928 if (!(!
MI->mayLoad() &&
MI->mayStore()))
932 if (
MI->getNumMemOperands() == 0)
933 return SIMemOpInfo(ST);
935 return constructFromMIWithMMO(
MI);
938std::optional<SIMemOpInfo>
942 if (
MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
949 auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
951 reportUnsupported(
MI,
"Unsupported atomic synchronization scope");
955 SIAtomicScope
Scope = SIAtomicScope::NONE;
956 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
957 bool IsCrossAddressSpaceOrdering =
false;
958 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
961 if (OrderingAddrSpace != SIAtomicAddrSpace::ATOMIC) {
966 reportUnsupported(
MI,
"Unsupported atomic address space");
970 auto SynchronizeAS = getSynchronizeAddrSpaceMD(*
MI);
972 OrderingAddrSpace = *SynchronizeAS;
974 return SIMemOpInfo(ST, Ordering, Scope, OrderingAddrSpace,
975 SIAtomicAddrSpace::ATOMIC, IsCrossAddressSpaceOrdering,
976 AtomicOrdering::NotAtomic);
979std::optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
983 if (!(
MI->mayLoad() &&
MI->mayStore()))
987 if (
MI->getNumMemOperands() == 0)
988 return SIMemOpInfo(ST);
990 return constructFromMIWithMMO(
MI);
993std::optional<SIMemOpInfo>
1000 return constructFromMIWithMMO(
MI);
1003SICacheControl::SICacheControl(
const GCNSubtarget &ST) :
ST(
ST) {
1004 TII =
ST.getInstrInfo();
1011 MachineOperand *
CPol =
TII->getNamedOperand(*
MI, AMDGPU::OpName::cpol);
1015 CPol->setImm(
CPol->getImm() | Bit);
1019bool SICacheControl::canAffectGlobalAddrSpace(SIAtomicAddrSpace AS)
const {
1020 assert((!
ST.hasGloballyAddressableScratch() ||
1021 (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ||
1022 (AS & SIAtomicAddrSpace::SCRATCH) == SIAtomicAddrSpace::NONE) &&
1023 "scratch instructions should already be replaced by flat "
1024 "instructions if GloballyAddressableScratch is enabled");
1025 return (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE;
1029std::unique_ptr<SICacheControl> SICacheControl::create(
const GCNSubtarget &ST) {
1030 GCNSubtarget::Generation Generation =
ST.getGeneration();
1031 if (
ST.hasGFX940Insts())
1032 return std::make_unique<SIGfx940CacheControl>(ST);
1033 if (
ST.hasGFX90AInsts())
1034 return std::make_unique<SIGfx90ACacheControl>(ST);
1035 if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
1036 return std::make_unique<SIGfx6CacheControl>(ST);
1037 if (Generation < AMDGPUSubtarget::GFX10)
1038 return std::make_unique<SIGfx7CacheControl>(ST);
1039 if (Generation < AMDGPUSubtarget::GFX11)
1040 return std::make_unique<SIGfx10CacheControl>(ST);
1041 if (Generation < AMDGPUSubtarget::GFX12)
1042 return std::make_unique<SIGfx11CacheControl>(ST);
1043 return std::make_unique<SIGfx12CacheControl>(ST);
1046bool SIGfx6CacheControl::enableLoadCacheBypass(
1048 SIAtomicScope Scope,
1049 SIAtomicAddrSpace AddrSpace)
const {
1053 if (canAffectGlobalAddrSpace(AddrSpace)) {
1055 case SIAtomicScope::SYSTEM:
1056 case SIAtomicScope::AGENT:
1061 case SIAtomicScope::WORKGROUP:
1062 case SIAtomicScope::WAVEFRONT:
1063 case SIAtomicScope::SINGLETHREAD:
1081bool SIGfx6CacheControl::enableStoreCacheBypass(
1083 SIAtomicScope Scope,
1084 SIAtomicAddrSpace AddrSpace)
const {
1094bool SIGfx6CacheControl::enableRMWCacheBypass(
1096 SIAtomicScope Scope,
1097 SIAtomicAddrSpace AddrSpace)
const {
1109bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal(
1111 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1121 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1129 if (
Op == SIMemOp::LOAD)
1137 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1138 Position::AFTER, AtomicOrdering::Unordered,
1144 if (IsNonTemporal) {
1156 SIAtomicScope Scope,
1157 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
1158 bool IsCrossAddrSpaceOrdering, Position Pos,
1160 bool AtomicsOnly)
const {
1163 MachineBasicBlock &
MBB = *
MI->getParent();
1166 if (Pos == Position::AFTER)
1170 bool LGKMCnt =
false;
1172 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1173 SIAtomicAddrSpace::NONE) {
1175 case SIAtomicScope::SYSTEM:
1176 case SIAtomicScope::AGENT:
1179 case SIAtomicScope::WORKGROUP:
1180 case SIAtomicScope::WAVEFRONT:
1181 case SIAtomicScope::SINGLETHREAD:
1190 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1192 case SIAtomicScope::SYSTEM:
1193 case SIAtomicScope::AGENT:
1194 case SIAtomicScope::WORKGROUP:
1201 LGKMCnt |= IsCrossAddrSpaceOrdering;
1203 case SIAtomicScope::WAVEFRONT:
1204 case SIAtomicScope::SINGLETHREAD:
1213 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
1215 case SIAtomicScope::SYSTEM:
1216 case SIAtomicScope::AGENT:
1223 LGKMCnt |= IsCrossAddrSpaceOrdering;
1225 case SIAtomicScope::WORKGROUP:
1226 case SIAtomicScope::WAVEFRONT:
1227 case SIAtomicScope::SINGLETHREAD:
1236 if (VMCnt || LGKMCnt) {
1237 unsigned WaitCntImmediate =
1243 .
addImm(WaitCntImmediate);
1251 Scope == SIAtomicScope::WORKGROUP &&
1252 (AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1257 if (Pos == Position::AFTER)
1264 SIAtomicScope Scope,
1265 SIAtomicAddrSpace AddrSpace,
1266 Position Pos)
const {
1267 if (!InsertCacheInv)
1272 MachineBasicBlock &
MBB = *
MI->getParent();
1275 if (Pos == Position::AFTER)
1278 if (canAffectGlobalAddrSpace(AddrSpace)) {
1280 case SIAtomicScope::SYSTEM:
1281 case SIAtomicScope::AGENT:
1285 case SIAtomicScope::WORKGROUP:
1286 case SIAtomicScope::WAVEFRONT:
1287 case SIAtomicScope::SINGLETHREAD:
1302 if (Pos == Position::AFTER)
1309 SIAtomicScope Scope,
1310 SIAtomicAddrSpace AddrSpace,
1311 bool IsCrossAddrSpaceOrdering,
1312 Position Pos)
const {
1313 return insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
1314 IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release,
1319 SIAtomicScope Scope,
1320 SIAtomicAddrSpace AddrSpace,
1321 Position Pos)
const {
1322 if (!InsertCacheInv)
1327 MachineBasicBlock &
MBB = *
MI->getParent();
1333 ? AMDGPU::BUFFER_WBINVL1
1334 : AMDGPU::BUFFER_WBINVL1_VOL;
1336 if (Pos == Position::AFTER)
1339 if (canAffectGlobalAddrSpace(AddrSpace)) {
1341 case SIAtomicScope::SYSTEM:
1342 case SIAtomicScope::AGENT:
1346 case SIAtomicScope::WORKGROUP:
1347 case SIAtomicScope::WAVEFRONT:
1348 case SIAtomicScope::SINGLETHREAD:
1363 if (Pos == Position::AFTER)
1369bool SIGfx90ACacheControl::enableLoadCacheBypass(
1371 SIAtomicScope Scope,
1372 SIAtomicAddrSpace AddrSpace)
const {
1376 if (canAffectGlobalAddrSpace(AddrSpace)) {
1378 case SIAtomicScope::SYSTEM:
1379 case SIAtomicScope::AGENT:
1384 case SIAtomicScope::WORKGROUP:
1389 if (
ST.isTgSplitEnabled())
1392 case SIAtomicScope::WAVEFRONT:
1393 case SIAtomicScope::SINGLETHREAD:
1411bool SIGfx90ACacheControl::enableRMWCacheBypass(
1413 SIAtomicScope Scope,
1414 SIAtomicAddrSpace AddrSpace)
const {
1418 if (canAffectGlobalAddrSpace(AddrSpace)) {
1420 case SIAtomicScope::SYSTEM:
1421 case SIAtomicScope::AGENT:
1426 case SIAtomicScope::WORKGROUP:
1427 case SIAtomicScope::WAVEFRONT:
1428 case SIAtomicScope::SINGLETHREAD:
1439bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal(
1441 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1451 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1459 if (
Op == SIMemOp::LOAD)
1467 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1468 Position::AFTER, AtomicOrdering::Unordered,
1474 if (IsNonTemporal) {
1486 SIAtomicScope Scope,
1487 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
1488 bool IsCrossAddrSpaceOrdering,
1490 bool AtomicsOnly)
const {
1491 if (
ST.isTgSplitEnabled()) {
1499 if (((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH |
1500 SIAtomicAddrSpace::GDS)) != SIAtomicAddrSpace::NONE) &&
1501 (Scope == SIAtomicScope::WORKGROUP)) {
1503 Scope = SIAtomicScope::AGENT;
1507 AddrSpace &= ~SIAtomicAddrSpace
::LDS;
1509 return SIGfx7CacheControl::insertWait(
MI, Scope, AddrSpace,
Op,
1510 IsCrossAddrSpaceOrdering, Pos, Order,
1515 SIAtomicScope Scope,
1516 SIAtomicAddrSpace AddrSpace,
1517 Position Pos)
const {
1518 if (!InsertCacheInv)
1523 MachineBasicBlock &
MBB = *
MI->getParent();
1526 if (Pos == Position::AFTER)
1529 if (canAffectGlobalAddrSpace(AddrSpace)) {
1531 case SIAtomicScope::SYSTEM:
1543 case SIAtomicScope::AGENT:
1546 case SIAtomicScope::WORKGROUP:
1551 if (
ST.isTgSplitEnabled()) {
1553 Scope = SIAtomicScope::AGENT;
1556 case SIAtomicScope::WAVEFRONT:
1557 case SIAtomicScope::SINGLETHREAD:
1572 if (Pos == Position::AFTER)
1575 Changed |= SIGfx7CacheControl::insertAcquire(
MI, Scope, AddrSpace, Pos);
1581 SIAtomicScope Scope,
1582 SIAtomicAddrSpace AddrSpace,
1583 bool IsCrossAddrSpaceOrdering,
1584 Position Pos)
const {
1587 MachineBasicBlock &
MBB = *
MI->getParent();
1590 if (Pos == Position::AFTER)
1593 if (canAffectGlobalAddrSpace(AddrSpace)) {
1595 case SIAtomicScope::SYSTEM:
1609 case SIAtomicScope::AGENT:
1610 case SIAtomicScope::WORKGROUP:
1611 case SIAtomicScope::WAVEFRONT:
1612 case SIAtomicScope::SINGLETHREAD:
1620 if (Pos == Position::AFTER)
1624 SIGfx7CacheControl::insertRelease(
MI, Scope, AddrSpace,
1625 IsCrossAddrSpaceOrdering, Pos);
1630bool SIGfx940CacheControl::enableLoadCacheBypass(
1632 SIAtomicAddrSpace AddrSpace)
const {
1636 if (canAffectGlobalAddrSpace(AddrSpace)) {
1638 case SIAtomicScope::SYSTEM:
1643 case SIAtomicScope::AGENT:
1647 case SIAtomicScope::WORKGROUP:
1655 case SIAtomicScope::WAVEFRONT:
1656 case SIAtomicScope::SINGLETHREAD:
1674bool SIGfx940CacheControl::enableStoreCacheBypass(
1676 SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace)
const {
1680 if (canAffectGlobalAddrSpace(AddrSpace)) {
1682 case SIAtomicScope::SYSTEM:
1687 case SIAtomicScope::AGENT:
1691 case SIAtomicScope::WORKGROUP:
1695 case SIAtomicScope::WAVEFRONT:
1696 case SIAtomicScope::SINGLETHREAD:
1714bool SIGfx940CacheControl::enableRMWCacheBypass(
1716 SIAtomicAddrSpace AddrSpace)
const {
1720 if (canAffectGlobalAddrSpace(AddrSpace)) {
1722 case SIAtomicScope::SYSTEM:
1726 case SIAtomicScope::AGENT:
1727 case SIAtomicScope::WORKGROUP:
1728 case SIAtomicScope::WAVEFRONT:
1729 case SIAtomicScope::SINGLETHREAD:
1743bool SIGfx940CacheControl::enableVolatileAndOrNonTemporal(
1745 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1755 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1769 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1770 Position::AFTER, AtomicOrdering::Unordered,
1776 if (IsNonTemporal) {
1785 SIAtomicScope Scope,
1786 SIAtomicAddrSpace AddrSpace,
1787 Position Pos)
const {
1788 if (!InsertCacheInv)
1793 MachineBasicBlock &
MBB = *
MI->getParent();
1796 if (Pos == Position::AFTER)
1799 if (canAffectGlobalAddrSpace(AddrSpace)) {
1801 case SIAtomicScope::SYSTEM:
1815 case SIAtomicScope::AGENT:
1828 case SIAtomicScope::WORKGROUP:
1833 if (
ST.isTgSplitEnabled()) {
1847 case SIAtomicScope::WAVEFRONT:
1848 case SIAtomicScope::SINGLETHREAD:
1864 if (Pos == Position::AFTER)
1871 SIAtomicScope Scope,
1872 SIAtomicAddrSpace AddrSpace,
1873 bool IsCrossAddrSpaceOrdering,
1874 Position Pos)
const {
1877 MachineBasicBlock &
MBB = *
MI->getParent();
1880 if (Pos == Position::AFTER)
1883 if (canAffectGlobalAddrSpace(AddrSpace)) {
1885 case SIAtomicScope::SYSTEM:
1900 case SIAtomicScope::AGENT:
1910 case SIAtomicScope::WORKGROUP:
1911 case SIAtomicScope::WAVEFRONT:
1912 case SIAtomicScope::SINGLETHREAD:
1922 if (Pos == Position::AFTER)
1927 Changed |= insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
1928 IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release,
1934bool SIGfx10CacheControl::enableLoadCacheBypass(
1936 SIAtomicScope Scope,
1937 SIAtomicAddrSpace AddrSpace)
const {
1941 if (canAffectGlobalAddrSpace(AddrSpace)) {
1943 case SIAtomicScope::SYSTEM:
1944 case SIAtomicScope::AGENT:
1950 case SIAtomicScope::WORKGROUP:
1955 if (!
ST.isCuModeEnabled())
1958 case SIAtomicScope::WAVEFRONT:
1959 case SIAtomicScope::SINGLETHREAD:
1977bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal(
1979 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1990 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1998 if (
Op == SIMemOp::LOAD) {
2008 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
2009 Position::AFTER, AtomicOrdering::Unordered,
2014 if (IsNonTemporal) {
2019 if (
Op == SIMemOp::STORE)
2030 SIAtomicScope Scope,
2031 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
2032 bool IsCrossAddrSpaceOrdering,
2034 bool AtomicsOnly)
const {
2037 MachineBasicBlock &
MBB = *
MI->getParent();
2040 if (Pos == Position::AFTER)
2045 bool LGKMCnt =
false;
2047 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
2048 SIAtomicAddrSpace::NONE) {
2050 case SIAtomicScope::SYSTEM:
2051 case SIAtomicScope::AGENT:
2052 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2054 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2057 case SIAtomicScope::WORKGROUP:
2067 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2069 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2073 case SIAtomicScope::WAVEFRONT:
2074 case SIAtomicScope::SINGLETHREAD:
2083 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
2085 case SIAtomicScope::SYSTEM:
2086 case SIAtomicScope::AGENT:
2087 case SIAtomicScope::WORKGROUP:
2094 LGKMCnt |= IsCrossAddrSpaceOrdering;
2096 case SIAtomicScope::WAVEFRONT:
2097 case SIAtomicScope::SINGLETHREAD:
2106 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
2108 case SIAtomicScope::SYSTEM:
2109 case SIAtomicScope::AGENT:
2116 LGKMCnt |= IsCrossAddrSpaceOrdering;
2118 case SIAtomicScope::WORKGROUP:
2119 case SIAtomicScope::WAVEFRONT:
2120 case SIAtomicScope::SINGLETHREAD:
2129 if (VMCnt || LGKMCnt) {
2130 unsigned WaitCntImmediate =
2136 .
addImm(WaitCntImmediate);
2144 Scope == SIAtomicScope::WORKGROUP &&
2145 (AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
2157 if (Pos == Position::AFTER)
2164 SIAtomicScope Scope,
2165 SIAtomicAddrSpace AddrSpace,
2166 Position Pos)
const {
2167 if (!InsertCacheInv)
2172 MachineBasicBlock &
MBB = *
MI->getParent();
2175 if (Pos == Position::AFTER)
2178 if (canAffectGlobalAddrSpace(AddrSpace)) {
2180 case SIAtomicScope::SYSTEM:
2181 case SIAtomicScope::AGENT:
2189 case SIAtomicScope::WORKGROUP:
2194 if (!
ST.isCuModeEnabled()) {
2199 case SIAtomicScope::WAVEFRONT:
2200 case SIAtomicScope::SINGLETHREAD:
2215 if (Pos == Position::AFTER)
2221bool SIGfx11CacheControl::enableLoadCacheBypass(
2223 SIAtomicAddrSpace AddrSpace)
const {
2227 if (canAffectGlobalAddrSpace(AddrSpace)) {
2229 case SIAtomicScope::SYSTEM:
2230 case SIAtomicScope::AGENT:
2235 case SIAtomicScope::WORKGROUP:
2240 if (!
ST.isCuModeEnabled())
2243 case SIAtomicScope::WAVEFRONT:
2244 case SIAtomicScope::SINGLETHREAD:
2262bool SIGfx11CacheControl::enableVolatileAndOrNonTemporal(
2264 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
2275 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
2283 if (
Op == SIMemOp::LOAD)
2294 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
2295 Position::AFTER, AtomicOrdering::Unordered,
2300 if (IsNonTemporal) {
2305 if (
Op == SIMemOp::STORE)
2319 MachineOperand *
CPol =
TII->getNamedOperand(*
MI, OpName::cpol);
2334 MachineOperand *
CPol =
TII->getNamedOperand(*
MI, OpName::cpol);
2347bool SIGfx12CacheControl::insertWaitsBeforeSystemScopeStore(
2351 MachineBasicBlock &
MBB = *
MI->getParent();
2355 if (
ST.hasImageInsts()) {
2366 SIAtomicScope Scope,
2367 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
2368 bool IsCrossAddrSpaceOrdering,
2370 bool AtomicsOnly)
const {
2373 MachineBasicBlock &
MBB = *
MI->getParent();
2376 bool LOADCnt =
false;
2378 bool STORECnt =
false;
2380 if (Pos == Position::AFTER)
2383 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
2384 SIAtomicAddrSpace::NONE) {
2386 case SIAtomicScope::SYSTEM:
2387 case SIAtomicScope::AGENT:
2388 case SIAtomicScope::CLUSTER:
2389 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2391 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2394 case SIAtomicScope::WORKGROUP:
2411 if (!
ST.isCuModeEnabled() ||
ST.hasGFX1250Insts() ||
2413 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2415 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2419 case SIAtomicScope::WAVEFRONT:
2420 case SIAtomicScope::SINGLETHREAD:
2429 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
2431 case SIAtomicScope::SYSTEM:
2432 case SIAtomicScope::AGENT:
2433 case SIAtomicScope::CLUSTER:
2434 case SIAtomicScope::WORKGROUP:
2441 DSCnt |= IsCrossAddrSpaceOrdering;
2443 case SIAtomicScope::WAVEFRONT:
2444 case SIAtomicScope::SINGLETHREAD:
2465 if (!AtomicsOnly &&
ST.hasImageInsts()) {
2483 if (Pos == Position::AFTER)
2490 SIAtomicScope Scope,
2491 SIAtomicAddrSpace AddrSpace,
2492 Position Pos)
const {
2493 if (!InsertCacheInv)
2496 MachineBasicBlock &
MBB = *
MI->getParent();
2505 if (!canAffectGlobalAddrSpace(AddrSpace))
2510 case SIAtomicScope::SYSTEM:
2513 case SIAtomicScope::AGENT:
2516 case SIAtomicScope::CLUSTER:
2519 case SIAtomicScope::WORKGROUP:
2527 if (
ST.isCuModeEnabled())
2532 case SIAtomicScope::WAVEFRONT:
2533 case SIAtomicScope::SINGLETHREAD:
2540 if (Pos == Position::AFTER)
2545 if (Pos == Position::AFTER)
2552 SIAtomicScope Scope,
2553 SIAtomicAddrSpace AddrSpace,
2554 bool IsCrossAddrSpaceOrdering,
2555 Position Pos)
const {
2558 MachineBasicBlock &
MBB = *
MI->getParent();
2565 if (canAffectGlobalAddrSpace(AddrSpace)) {
2566 if (Pos == Position::AFTER)
2576 case SIAtomicScope::SYSTEM:
2581 case SIAtomicScope::AGENT:
2583 if (
ST.hasGFX1250Insts()) {
2589 case SIAtomicScope::CLUSTER:
2590 case SIAtomicScope::WORKGROUP:
2592 case SIAtomicScope::WAVEFRONT:
2593 case SIAtomicScope::SINGLETHREAD:
2600 if (Pos == Position::AFTER)
2607 Changed |= insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
2608 IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release,
2614bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
2616 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
2625 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
2632 }
else if (IsNonTemporal) {
2645 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
2646 Position::AFTER, AtomicOrdering::Unordered,
2653bool SIGfx12CacheControl::finalizeStore(MachineInstr &
MI,
bool Atomic)
const {
2654 assert(
MI.mayStore() &&
"Not a Store inst");
2655 const bool IsRMW = (
MI.mayLoad() &&
MI.mayStore());
2660 if (Atomic &&
ST.requiresWaitXCntBeforeAtomicStores() &&
TII->isFLAT(
MI)) {
2661 MachineBasicBlock &
MBB = *
MI.getParent();
2670 MachineOperand *
CPol =
TII->getNamedOperand(
MI, OpName::cpol);
2676 if (
ST.requiresWaitsBeforeSystemScopeStores() && !Atomic &&
2678 Changed |= insertWaitsBeforeSystemScopeStore(
MI.getIterator());
2683bool SIGfx12CacheControl::handleCooperativeAtomic(MachineInstr &
MI)
const {
2684 if (!
ST.hasGFX1250Insts())
2688 MachineOperand *
CPol =
TII->getNamedOperand(
MI, OpName::cpol);
2689 assert(CPol &&
"No CPol operand?");
2697 SIAtomicScope Scope,
2698 SIAtomicAddrSpace AddrSpace)
const {
2701 if (canAffectGlobalAddrSpace(AddrSpace)) {
2703 case SIAtomicScope::SYSTEM:
2706 case SIAtomicScope::AGENT:
2709 case SIAtomicScope::CLUSTER:
2712 case SIAtomicScope::WORKGROUP:
2715 if (!
ST.isCuModeEnabled())
2718 case SIAtomicScope::WAVEFRONT:
2719 case SIAtomicScope::SINGLETHREAD:
2737bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
2738 if (AtomicPseudoMIs.empty())
2741 for (
auto &
MI : AtomicPseudoMIs)
2742 MI->eraseFromParent();
2744 AtomicPseudoMIs.clear();
2748bool SIMemoryLegalizer::expandLoad(
const SIMemOpInfo &MOI,
2754 if (MOI.isAtomic()) {
2756 if (Order == AtomicOrdering::Monotonic ||
2757 Order == AtomicOrdering::Acquire ||
2758 Order == AtomicOrdering::SequentiallyConsistent) {
2759 Changed |= CC->enableLoadCacheBypass(
MI, MOI.getScope(),
2760 MOI.getOrderingAddrSpace());
2765 if (MOI.isCooperative())
2766 Changed |= CC->handleCooperativeAtomic(*
MI);
2768 if (Order == AtomicOrdering::SequentiallyConsistent)
2769 Changed |= CC->insertWait(
MI, MOI.getScope(), MOI.getOrderingAddrSpace(),
2770 SIMemOp::LOAD | SIMemOp::STORE,
2771 MOI.getIsCrossAddressSpaceOrdering(),
2772 Position::BEFORE, Order,
false);
2774 if (Order == AtomicOrdering::Acquire ||
2775 Order == AtomicOrdering::SequentiallyConsistent) {
2778 CC->insertWait(
MI, MOI.getScope(), MOI.getInstrAddrSpace(),
2779 SIMemOp::LOAD, MOI.getIsCrossAddressSpaceOrdering(),
2780 Position::AFTER, Order,
true);
2781 Changed |= CC->insertAcquire(
MI, MOI.getScope(),
2782 MOI.getOrderingAddrSpace(),
2792 Changed |= CC->enableVolatileAndOrNonTemporal(
2793 MI, MOI.getInstrAddrSpace(), SIMemOp::LOAD, MOI.isVolatile(),
2794 MOI.isNonTemporal(), MOI.isLastUse());
2799bool SIMemoryLegalizer::expandStore(
const SIMemOpInfo &MOI,
2805 MachineInstr &StoreMI = *
MI;
2807 if (MOI.isAtomic()) {
2808 if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2809 MOI.getOrdering() == AtomicOrdering::Release ||
2810 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2811 Changed |= CC->enableStoreCacheBypass(
MI, MOI.getScope(),
2812 MOI.getOrderingAddrSpace());
2817 if (MOI.isCooperative())
2818 Changed |= CC->handleCooperativeAtomic(*
MI);
2820 if (MOI.getOrdering() == AtomicOrdering::Release ||
2821 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2822 Changed |= CC->insertRelease(
MI, MOI.getScope(),
2823 MOI.getOrderingAddrSpace(),
2824 MOI.getIsCrossAddressSpaceOrdering(),
2827 Changed |= CC->finalizeStore(StoreMI,
true);
2834 Changed |= CC->enableVolatileAndOrNonTemporal(
2835 MI, MOI.getInstrAddrSpace(), SIMemOp::STORE, MOI.isVolatile(),
2836 MOI.isNonTemporal());
2840 Changed |= CC->finalizeStore(StoreMI,
false);
2844bool SIMemoryLegalizer::expandAtomicFence(
const SIMemOpInfo &MOI,
2846 assert(
MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
2848 AtomicPseudoMIs.push_back(
MI);
2851 const SIAtomicAddrSpace OrderingAddrSpace = MOI.getOrderingAddrSpace();
2853 if (MOI.isAtomic()) {
2855 if (Order == AtomicOrdering::Acquire) {
2857 Changed |= CC->insertWait(
MI, MOI.getScope(), OrderingAddrSpace,
2858 SIMemOp::LOAD | SIMemOp::STORE,
2859 MOI.getIsCrossAddressSpaceOrdering(),
2860 Position::BEFORE, Order,
true);
2863 if (Order == AtomicOrdering::Release ||
2864 Order == AtomicOrdering::AcquireRelease ||
2865 Order == AtomicOrdering::SequentiallyConsistent)
2873 Changed |= CC->insertRelease(
MI, MOI.getScope(), OrderingAddrSpace,
2874 MOI.getIsCrossAddressSpaceOrdering(),
2882 if (Order == AtomicOrdering::Acquire ||
2883 Order == AtomicOrdering::AcquireRelease ||
2884 Order == AtomicOrdering::SequentiallyConsistent)
2885 Changed |= CC->insertAcquire(
MI, MOI.getScope(), OrderingAddrSpace,
2894bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
2899 MachineInstr &RMWMI = *
MI;
2901 if (MOI.isAtomic()) {
2903 if (Order == AtomicOrdering::Monotonic ||
2904 Order == AtomicOrdering::Acquire || Order == AtomicOrdering::Release ||
2905 Order == AtomicOrdering::AcquireRelease ||
2906 Order == AtomicOrdering::SequentiallyConsistent) {
2907 Changed |= CC->enableRMWCacheBypass(
MI, MOI.getScope(),
2908 MOI.getInstrAddrSpace());
2911 if (Order == AtomicOrdering::Release ||
2912 Order == AtomicOrdering::AcquireRelease ||
2913 Order == AtomicOrdering::SequentiallyConsistent ||
2914 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
2915 Changed |= CC->insertRelease(
MI, MOI.getScope(),
2916 MOI.getOrderingAddrSpace(),
2917 MOI.getIsCrossAddressSpaceOrdering(),
2920 if (Order == AtomicOrdering::Acquire ||
2921 Order == AtomicOrdering::AcquireRelease ||
2922 Order == AtomicOrdering::SequentiallyConsistent ||
2923 MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
2924 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
2927 CC->insertWait(
MI, MOI.getScope(), MOI.getInstrAddrSpace(),
2928 isAtomicRet(*
MI) ? SIMemOp::LOAD : SIMemOp::STORE,
2929 MOI.getIsCrossAddressSpaceOrdering(), Position::AFTER,
2931 Changed |= CC->insertAcquire(
MI, MOI.getScope(),
2932 MOI.getOrderingAddrSpace(),
2936 Changed |= CC->finalizeStore(RMWMI,
true);
2943bool SIMemoryLegalizer::expandLDSDMA(
const SIMemOpInfo &MOI,
2955 return CC->enableVolatileAndOrNonTemporal(
2956 MI, MOI.getInstrAddrSpace(), OpKind, MOI.isVolatile(),
2957 MOI.isNonTemporal(), MOI.isLastUse());
2960bool SIMemoryLegalizerLegacy::runOnMachineFunction(MachineFunction &MF) {
2961 const MachineModuleInfo &MMI =
2962 getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
2963 return SIMemoryLegalizer(MMI).run(MF);
2970 .getCachedResult<MachineModuleAnalysis>(
2972 assert(MMI &&
"MachineModuleAnalysis must be available");
2973 if (!SIMemoryLegalizer(MMI->getMMI()).run(MF))
2983 CC = SICacheControl::create(ST);
2985 for (
auto &
MBB : MF) {
2989 if (
MI->isBundle() &&
MI->mayLoadOrStore()) {
2992 I != E &&
I->isBundledWithPred(); ++
I) {
2993 I->unbundleFromPred();
2996 MO.setIsInternalRead(
false);
2999 MI->eraseFromParent();
3000 MI =
II->getIterator();
3006 if (
const auto &MOI = MOA.getLoadInfo(
MI)) {
3008 }
else if (
const auto &MOI = MOA.getStoreInfo(
MI)) {
3010 }
else if (
const auto &MOI = MOA.getLDSDMAInfo(
MI)) {
3012 }
else if (
const auto &MOI = MOA.getAtomicFenceInfo(
MI)) {
3014 }
else if (
const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(
MI)) {
3015 Changed |= expandAtomicCmpxchgOrRmw(*MOI,
MI);
3020 Changed |= removeAtomicPseudoMIs();
3026char SIMemoryLegalizerLegacy::
ID = 0;
3030 return new SIMemoryLegalizerLegacy();
static std::optional< LoadInfo > getLoadInfo(const MachineInstr &MI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
Provides AMDGPU specific target descriptions.
AMDGPU Machine Module Info.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
This header defines various interfaces for pass management in LLVM.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static cl::opt< bool > AmdgcnSkipCacheInvalidations("amdgcn-skip-cache-invalidations", cl::init(false), cl::Hidden, cl::desc("Use this to skip inserting cache invalidating instructions."))
static const uint32_t IV[8]
SyncScope::ID getWorkgroupSSID() const
SyncScope::ID getWavefrontSSID() const
SyncScope::ID getAgentSSID() const
SyncScope::ID getClusterOneAddressSpaceSSID() const
SyncScope::ID getClusterSSID() const
std::optional< bool > isSyncScopeInclusion(SyncScope::ID A, SyncScope::ID B) const
In AMDGPU target synchronization scopes are inclusive, meaning a larger synchronization scope is incl...
SyncScope::ID getAgentOneAddressSpaceSSID() const
SyncScope::ID getSingleThreadOneAddressSpaceSSID() const
SyncScope::ID getWavefrontOneAddressSpaceSSID() const
SyncScope::ID getSystemOneAddressSpaceSSID() const
SyncScope::ID getWorkgroupOneAddressSpaceSSID() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
Diagnostic information for unsupported feature in backend.
FunctionPass class - This class is used to implement most global optimizations.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
A helper class to return the specified delimiter string after the first invocation of operator String...
Instructions::iterator instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Ty & getObjFileInfo()
Keep track of various per-module pieces of information for backends that would like to do so.
MachineOperand class - Representation of each machine instruction operand.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
static bool mayWriteLDSThroughDMA(const MachineInstr &MI)
static bool isAtomicRet(const MachineInstr &MI)
static bool isLDSDMA(const MachineInstr &MI)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
StringRef - Represent a constant reference to a string, i.e.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ BUFFER_STRIDED_POINTER
Address space for 192-bit fat buffer pointers with an additional index.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ BUFFER_FAT_POINTER
Address space for 160-bit buffer fat pointers.
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
unsigned getVmcntBitMask(const IsaVersion &Version)
unsigned getLgkmcntBitMask(const IsaVersion &Version)
unsigned getExpcntBitMask(const IsaVersion &Version)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Undef
Value of the register doesn't matter.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
@ System
Synchronized with respect to all concurrently executing threads.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
OuterAnalysisManagerProxy< ModuleAnalysisManager, MachineFunction > ModuleAnalysisManagerMachineFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
char & SIMemoryLegalizerID
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
bool isReleaseOrStronger(AtomicOrdering AO)
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
AtomicOrdering getMergedAtomicOrdering(AtomicOrdering AO, AtomicOrdering Other)
Return a single atomic ordering that is at least as strong as both the AO and Other orderings for an ...
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ LLVM_MARK_AS_BITMASK_ENUM
DWARFExpression::Operation Op
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
FunctionPass * createSIMemoryLegalizerPass()