LLVM 23.0.0git
AtomicExpandPass.cpp
Go to the documentation of this file.
1//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass (at IR level) to replace atomic instructions with
10// __atomic_* library calls, or target specific instruction which implement the
11// same semantics in a way which better fits the target backend. This can
12// include the use of (intrinsic-based) load-linked/store-conditional loops,
13// AtomicCmpXchg, or type coercions.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/ArrayRef.h"
28#include "llvm/IR/Attributes.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Constant.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/Function.h"
35#include "llvm/IR/IRBuilder.h"
36#include "llvm/IR/Instruction.h"
38#include "llvm/IR/MDBuilder.h"
40#include "llvm/IR/Module.h"
42#include "llvm/IR/Type.h"
43#include "llvm/IR/User.h"
44#include "llvm/IR/Value.h"
46#include "llvm/Pass.h"
49#include "llvm/Support/Debug.h"
54#include <cassert>
55#include <cstdint>
56#include <iterator>
57
58using namespace llvm;
59
60#define DEBUG_TYPE "atomic-expand"
61
62namespace {
63
64class AtomicExpandImpl {
65 const TargetLowering *TLI = nullptr;
66 const LibcallLoweringInfo *LibcallLowering = nullptr;
67 const DataLayout *DL = nullptr;
68
69private:
70 /// Callback type for emitting a cmpxchg instruction during RMW expansion.
71 /// Parameters: (Builder, Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
72 /// SSID, IsVolatile, /* OUT */ Success, /* OUT */ NewLoaded,
73 /// MetadataSrc)
74 using CreateCmpXchgInstFun = function_ref<void(
76 SyncScope::ID, bool, Value *&, Value *&, Instruction *)>;
77
78 void handleFailure(Instruction &FailedInst, const Twine &Msg,
79 Instruction *DiagnosticInst = nullptr) const {
80 LLVMContext &Ctx = FailedInst.getContext();
81
82 // TODO: Do not use generic error type.
83 Ctx.emitError(DiagnosticInst ? DiagnosticInst : &FailedInst, Msg);
84
85 if (!FailedInst.getType()->isVoidTy())
86 FailedInst.replaceAllUsesWith(PoisonValue::get(FailedInst.getType()));
87 FailedInst.eraseFromParent();
88 }
89
90 template <typename Inst>
91 void handleUnsupportedAtomicSize(Inst *I, const Twine &AtomicOpName,
92 Instruction *DiagnosticInst = nullptr) const;
93
94 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
95 bool tryInsertTrailingSeqCstFence(Instruction *AtomicI);
96 template <typename AtomicInst>
97 bool tryInsertFencesForAtomic(AtomicInst *AtomicI, bool OrderingRequiresFence,
98 AtomicOrdering NewOrdering);
99 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
100 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
101 bool tryExpandAtomicLoad(LoadInst *LI);
102 bool expandAtomicLoadToLL(LoadInst *LI);
103 bool expandAtomicLoadToCmpXchg(LoadInst *LI);
104 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
105 bool tryExpandAtomicStore(StoreInst *SI);
106 void expandAtomicStoreToXChg(StoreInst *SI);
107 bool tryExpandAtomicRMW(AtomicRMWInst *AI);
108 AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
109 Value *
110 insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,
111 Align AddrAlign, AtomicOrdering MemOpOrder,
112 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
113 void expandAtomicOpToLLSC(
114 Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
115 AtomicOrdering MemOpOrder,
116 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
117 void expandPartwordAtomicRMW(
119 AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
120 bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
121 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
122 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
123
124 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
125 Value *insertRMWCmpXchgLoop(
126 IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
127 AtomicOrdering MemOpOrder, SyncScope::ID SSID, bool IsVolatile,
128 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
129 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc);
130 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
131
132 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
133 bool isIdempotentRMW(AtomicRMWInst *RMWI);
134 bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
135
136 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
137 Value *PointerOperand, Value *ValueOperand,
138 Value *CASExpected, AtomicOrdering Ordering,
139 AtomicOrdering Ordering2,
140 ArrayRef<RTLIB::Libcall> Libcalls);
141 void expandAtomicLoadToLibcall(LoadInst *LI);
142 void expandAtomicStoreToLibcall(StoreInst *LI);
143 void expandAtomicRMWToLibcall(AtomicRMWInst *I);
144 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I,
145 const Twine &AtomicOpName = "cmpxchg",
146 Instruction *DiagnosticInst = nullptr);
147
148 bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
149 CreateCmpXchgInstFun CreateCmpXchg);
150
151 bool processAtomicInstr(Instruction *I);
152
153public:
154 bool run(Function &F,
155 const LibcallLoweringModuleAnalysisResult &LibcallResult,
156 const TargetMachine *TM);
157};
158
159class AtomicExpandLegacy : public FunctionPass {
160public:
161 static char ID; // Pass identification, replacement for typeid
162
163 AtomicExpandLegacy() : FunctionPass(ID) {}
164
165 void getAnalysisUsage(AnalysisUsage &AU) const override {
168 }
169
170 bool runOnFunction(Function &F) override;
171};
172
173// IRBuilder to be used for replacement atomic instructions.
174struct ReplacementIRBuilder
175 : IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> {
176 MDNode *MMRAMD = nullptr;
177 MDNode *PCSectionsMD = nullptr;
178
179 // Preserves the DebugLoc from I, and preserves still valid metadata.
180 // Enable StrictFP builder mode when appropriate.
181 explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
182 : IRBuilder(
183 I->getContext(), InstSimplifyFolder(DL),
184 IRBuilderCallbackInserter([this](Instruction *I) { addMD(I); })) {
185 SetInsertPoint(I);
186 if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
187 this->setIsFPConstrained(true);
188
189 MMRAMD = I->getMetadata(LLVMContext::MD_mmra);
190 PCSectionsMD = I->getMetadata(LLVMContext::MD_pcsections);
191 }
192
193 void addMD(Instruction *I) {
195 I->setMetadata(LLVMContext::MD_mmra, MMRAMD);
196 I->setMetadata(LLVMContext::MD_pcsections, PCSectionsMD);
197 }
198};
199
200} // end anonymous namespace
201
202char AtomicExpandLegacy::ID = 0;
203
204char &llvm::AtomicExpandID = AtomicExpandLegacy::ID;
205
207 "Expand Atomic instructions", false, false)
210INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE,
211 "Expand Atomic instructions", false, false)
212
213// Helper functions to retrieve the size of atomic instructions.
214static unsigned getAtomicOpSize(LoadInst *LI) {
215 const DataLayout &DL = LI->getDataLayout();
216 return DL.getTypeStoreSize(LI->getType());
217}
218
219static unsigned getAtomicOpSize(StoreInst *SI) {
220 const DataLayout &DL = SI->getDataLayout();
221 return DL.getTypeStoreSize(SI->getValueOperand()->getType());
222}
223
224static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
225 const DataLayout &DL = RMWI->getDataLayout();
226 return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
227}
228
229static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
230 const DataLayout &DL = CASI->getDataLayout();
231 return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
232}
233
234/// Copy metadata that's safe to preserve when widening atomics.
236 const Instruction &Source) {
238 Source.getAllMetadata(MD);
239 LLVMContext &Ctx = Dest.getContext();
240 MDBuilder MDB(Ctx);
241
242 for (auto [ID, N] : MD) {
243 switch (ID) {
244 case LLVMContext::MD_dbg:
245 case LLVMContext::MD_tbaa:
246 case LLVMContext::MD_tbaa_struct:
247 case LLVMContext::MD_alias_scope:
248 case LLVMContext::MD_noalias:
249 case LLVMContext::MD_noalias_addrspace:
250 case LLVMContext::MD_access_group:
251 case LLVMContext::MD_mmra:
252 Dest.setMetadata(ID, N);
253 break;
254 default:
255 if (ID == Ctx.getMDKindID("amdgpu.no.remote.memory"))
256 Dest.setMetadata(ID, N);
257 else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory"))
258 Dest.setMetadata(ID, N);
259
260 // Losing amdgpu.ignore.denormal.mode, but it doesn't matter for current
261 // uses.
262 break;
263 }
264 }
265}
266
267template <typename Inst>
268static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
269 unsigned Size = getAtomicOpSize(I);
270 Align Alignment = I->getAlign();
271 unsigned MaxSize = TLI->getMaxAtomicSizeInBitsSupported() / 8;
272 return Alignment >= Size && Size <= MaxSize;
273}
274
275template <typename Inst>
277 raw_ostream &OS) {
278 unsigned Size = getAtomicOpSize(I);
279 Align Alignment = I->getAlign();
280 bool NeedSeparator = false;
281
282 if (Alignment < Size) {
283 OS << "instruction alignment " << Alignment.value()
284 << " is smaller than the required " << Size
285 << "-byte alignment for this atomic operation";
286 NeedSeparator = true;
287 }
288
289 unsigned MaxSize = TLI->getMaxAtomicSizeInBitsSupported() / 8;
290 if (Size > MaxSize) {
291 if (NeedSeparator)
292 OS << "; ";
293 OS << "target supports atomics up to " << MaxSize
294 << " bytes, but this atomic accesses " << Size << " bytes";
295 }
296}
297
298template <typename Inst>
299void AtomicExpandImpl::handleUnsupportedAtomicSize(
300 Inst *I, const Twine &AtomicOpName, Instruction *DiagnosticInst) const {
301 assert(!atomicSizeSupported(TLI, I) && "expected unsupported atomic size");
302 SmallString<128> FailureReason;
303 raw_svector_ostream OS(FailureReason);
305 handleFailure(*I, Twine("unsupported ") + AtomicOpName + ": " + FailureReason,
306 DiagnosticInst);
307}
308
309bool AtomicExpandImpl::tryInsertTrailingSeqCstFence(Instruction *AtomicI) {
311 return false;
312
313 IRBuilder Builder(AtomicI);
314 if (auto *TrailingFence = TLI->emitTrailingFence(
315 Builder, AtomicI, AtomicOrdering::SequentiallyConsistent)) {
316 TrailingFence->moveAfter(AtomicI);
317 return true;
318 }
319 return false;
320}
321
322template <typename AtomicInst>
323bool AtomicExpandImpl::tryInsertFencesForAtomic(AtomicInst *AtomicI,
324 bool OrderingRequiresFence,
325 AtomicOrdering NewOrdering) {
326 bool ShouldInsertFences = TLI->shouldInsertFencesForAtomic(AtomicI);
327 if (OrderingRequiresFence && ShouldInsertFences) {
328 AtomicOrdering FenceOrdering = AtomicI->getOrdering();
329 AtomicI->setOrdering(NewOrdering);
330 return bracketInstWithFences(AtomicI, FenceOrdering);
331 }
332 if (!ShouldInsertFences)
333 return tryInsertTrailingSeqCstFence(AtomicI);
334 return false;
335}
336
337bool AtomicExpandImpl::processAtomicInstr(Instruction *I) {
338 if (auto *LI = dyn_cast<LoadInst>(I)) {
339 if (!LI->isAtomic())
340 return false;
341
342 if (!atomicSizeSupported(TLI, LI)) {
343 expandAtomicLoadToLibcall(LI);
344 return true;
345 }
346
347 bool MadeChange = false;
348 if (TLI->shouldCastAtomicLoadInIR(LI) ==
349 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
350 LI = convertAtomicLoadToIntegerType(LI);
351 MadeChange = true;
352 }
353
354 MadeChange |= tryInsertFencesForAtomic(
355 LI, isAcquireOrStronger(LI->getOrdering()), AtomicOrdering::Monotonic);
356
357 MadeChange |= tryExpandAtomicLoad(LI);
358 return MadeChange;
359 }
360
361 if (auto *SI = dyn_cast<StoreInst>(I)) {
362 if (!SI->isAtomic())
363 return false;
364
365 if (!atomicSizeSupported(TLI, SI)) {
366 expandAtomicStoreToLibcall(SI);
367 return true;
368 }
369
370 bool MadeChange = false;
371 if (TLI->shouldCastAtomicStoreInIR(SI) ==
372 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
373 SI = convertAtomicStoreToIntegerType(SI);
374 MadeChange = true;
375 }
376
377 MadeChange |= tryInsertFencesForAtomic(
378 SI, isReleaseOrStronger(SI->getOrdering()), AtomicOrdering::Monotonic);
379
380 MadeChange |= tryExpandAtomicStore(SI);
381 return MadeChange;
382 }
383
384 if (auto *RMWI = dyn_cast<AtomicRMWInst>(I)) {
385 if (!atomicSizeSupported(TLI, RMWI)) {
386 expandAtomicRMWToLibcall(RMWI);
387 return true;
388 }
389
390 bool MadeChange = false;
391 if (TLI->shouldCastAtomicRMWIInIR(RMWI) ==
392 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
393 RMWI = convertAtomicXchgToIntegerType(RMWI);
394 MadeChange = true;
395 }
396
397 MadeChange |= tryInsertFencesForAtomic(
398 RMWI,
399 isReleaseOrStronger(RMWI->getOrdering()) ||
400 isAcquireOrStronger(RMWI->getOrdering()),
402
403 // There are two different ways of expanding RMW instructions:
404 // - into a load if it is idempotent
405 // - into a Cmpxchg/LL-SC loop otherwise
406 // we try them in that order.
407 MadeChange |= (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) ||
408 tryExpandAtomicRMW(RMWI);
409 return MadeChange;
410 }
411
412 if (auto *CASI = dyn_cast<AtomicCmpXchgInst>(I)) {
413 if (!atomicSizeSupported(TLI, CASI)) {
414 expandAtomicCASToLibcall(CASI);
415 return true;
416 }
417
418 // TODO: when we're ready to make the change at the IR level, we can
419 // extend convertCmpXchgToInteger for floating point too.
420 bool MadeChange = false;
421 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
422 // TODO: add a TLI hook to control this so that each target can
423 // convert to lowering the original type one at a time.
424 CASI = convertCmpXchgToIntegerType(CASI);
425 MadeChange = true;
426 }
427
428 auto CmpXchgExpansion = TLI->shouldExpandAtomicCmpXchgInIR(CASI);
429 if (TLI->shouldInsertFencesForAtomic(CASI)) {
430 if (CmpXchgExpansion == TargetLoweringBase::AtomicExpansionKind::None &&
431 (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
432 isAcquireOrStronger(CASI->getSuccessOrdering()) ||
433 isAcquireOrStronger(CASI->getFailureOrdering()))) {
434 // If a compare and swap is lowered to LL/SC, we can do smarter fence
435 // insertion, with a stronger one on the success path than on the
436 // failure path. As a result, fence insertion is directly done by
437 // expandAtomicCmpXchg in that case.
438 AtomicOrdering FenceOrdering = CASI->getMergedOrdering();
439 AtomicOrdering CASOrdering =
441 CASI->setSuccessOrdering(CASOrdering);
442 CASI->setFailureOrdering(CASOrdering);
443 MadeChange |= bracketInstWithFences(CASI, FenceOrdering);
444 }
445 } else if (CmpXchgExpansion !=
446 TargetLoweringBase::AtomicExpansionKind::LLSC) {
447 // CmpXchg LLSC is handled in expandAtomicCmpXchg().
448 MadeChange |= tryInsertTrailingSeqCstFence(CASI);
449 }
450
451 MadeChange |= tryExpandAtomicCmpXchg(CASI);
452 return MadeChange;
453 }
454
455 return false;
456}
457
458bool AtomicExpandImpl::run(
459 Function &F, const LibcallLoweringModuleAnalysisResult &LibcallResult,
460 const TargetMachine *TM) {
461 const auto *Subtarget = TM->getSubtargetImpl(F);
462 if (!Subtarget->enableAtomicExpand())
463 return false;
464 TLI = Subtarget->getTargetLowering();
465 LibcallLowering = &LibcallResult.getLibcallLowering(*Subtarget);
466 DL = &F.getDataLayout();
467
468 bool MadeChange = false;
469
470 for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
471 BasicBlock *BB = &*BBI;
472
474
475 for (BasicBlock::reverse_iterator I = BB->rbegin(), E = BB->rend(); I != E;
476 I = Next) {
477 Instruction &Inst = *I;
478 Next = std::next(I);
479
480 if (processAtomicInstr(&Inst)) {
481 MadeChange = true;
482
483 // New blocks may have been inserted.
484 BBE = F.end();
485 }
486 }
487 }
488
489 return MadeChange;
490}
491
492bool AtomicExpandLegacy::runOnFunction(Function &F) {
493
494 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
495 if (!TPC)
496 return false;
497 auto *TM = &TPC->getTM<TargetMachine>();
498
499 const LibcallLoweringModuleAnalysisResult &LibcallResult =
500 getAnalysis<LibcallLoweringInfoWrapper>().getResult(*F.getParent());
501 AtomicExpandImpl AE;
502 return AE.run(F, LibcallResult, TM);
503}
504
506 return new AtomicExpandLegacy();
507}
508
511 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
512
513 const LibcallLoweringModuleAnalysisResult *LibcallResult =
514 MAMProxy.getCachedResult<LibcallLoweringModuleAnalysis>(*F.getParent());
515
516 if (!LibcallResult) {
517 F.getContext().emitError("'" + LibcallLoweringModuleAnalysis::name() +
518 "' analysis required");
519 return PreservedAnalyses::all();
520 }
521
522 AtomicExpandImpl AE;
523
524 bool Changed = AE.run(F, *LibcallResult, TM);
525 if (!Changed)
526 return PreservedAnalyses::all();
527
529}
530
531bool AtomicExpandImpl::bracketInstWithFences(Instruction *I,
532 AtomicOrdering Order) {
533 ReplacementIRBuilder Builder(I, *DL);
534
535 auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
536
537 auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
538 // We have a guard here because not every atomic operation generates a
539 // trailing fence.
540 if (TrailingFence)
541 TrailingFence->moveAfter(I);
542
543 return (LeadingFence || TrailingFence);
544}
545
546/// Get the iX type with the same bitwidth as T.
548AtomicExpandImpl::getCorrespondingIntegerType(Type *T, const DataLayout &DL) {
549 EVT VT = TLI->getMemValueType(DL, T);
550 unsigned BitWidth = VT.getStoreSizeInBits();
551 assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
552 return IntegerType::get(T->getContext(), BitWidth);
553}
554
555/// Convert an atomic load of a non-integral type to an integer load of the
556/// equivalent bitwidth. See the function comment on
557/// convertAtomicStoreToIntegerType for background.
558LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) {
559 auto *M = LI->getModule();
560 Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());
561
562 ReplacementIRBuilder Builder(LI, *DL);
563
564 Value *Addr = LI->getPointerOperand();
565
566 auto *NewLI = Builder.CreateLoad(NewTy, Addr);
567 NewLI->setAlignment(LI->getAlign());
568 NewLI->setVolatile(LI->isVolatile());
569 NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
570 LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
571
572 Value *NewVal = LI->getType()->isPtrOrPtrVectorTy()
573 ? Builder.CreateIntToPtr(NewLI, LI->getType())
574 : Builder.CreateBitCast(NewLI, LI->getType());
575 LI->replaceAllUsesWith(NewVal);
576 LI->eraseFromParent();
577 return NewLI;
578}
579
580AtomicRMWInst *
581AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
583
584 auto *M = RMWI->getModule();
585 Type *NewTy =
586 getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
587
588 ReplacementIRBuilder Builder(RMWI, *DL);
589
590 Value *Addr = RMWI->getPointerOperand();
591 Value *Val = RMWI->getValOperand();
592 Value *NewVal = Val->getType()->isPointerTy()
593 ? Builder.CreatePtrToInt(Val, NewTy)
594 : Builder.CreateBitCast(Val, NewTy);
595
596 auto *NewRMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Addr, NewVal,
597 RMWI->getAlign(), RMWI->getOrdering(),
598 RMWI->getSyncScopeID());
599 NewRMWI->setVolatile(RMWI->isVolatile());
600 copyMetadataForAtomic(*NewRMWI, *RMWI);
601 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
602
603 Value *NewRVal = RMWI->getType()->isPointerTy()
604 ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType())
605 : Builder.CreateBitCast(NewRMWI, RMWI->getType());
606 RMWI->replaceAllUsesWith(NewRVal);
607 RMWI->eraseFromParent();
608 return NewRMWI;
609}
610
611bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
612 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
613 case TargetLoweringBase::AtomicExpansionKind::None:
614 return false;
615 case TargetLoweringBase::AtomicExpansionKind::LLSC:
616 expandAtomicOpToLLSC(
617 LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
618 LI->getOrdering(),
619 [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });
620 return true;
621 case TargetLoweringBase::AtomicExpansionKind::LLOnly:
622 return expandAtomicLoadToLL(LI);
623 case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
624 return expandAtomicLoadToCmpXchg(LI);
625 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
626 LI->setAtomic(AtomicOrdering::NotAtomic);
627 return true;
628 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
629 TLI->emitExpandAtomicLoad(LI);
630 return true;
631 default:
632 llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
633 }
634}
635
636bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
637 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
638 case TargetLoweringBase::AtomicExpansionKind::None:
639 return false;
640 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
641 TLI->emitExpandAtomicStore(SI);
642 return true;
643 case TargetLoweringBase::AtomicExpansionKind::Expand:
644 expandAtomicStoreToXChg(SI);
645 return true;
646 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
647 SI->setAtomic(AtomicOrdering::NotAtomic);
648 return true;
649 default:
650 llvm_unreachable("Unhandled case in tryExpandAtomicStore");
651 }
652}
653
654bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) {
655 ReplacementIRBuilder Builder(LI, *DL);
656
657 // On some architectures, load-linked instructions are atomic for larger
658 // sizes than normal loads. For example, the only 64-bit load guaranteed
659 // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
660 Value *Val = TLI->emitLoadLinked(Builder, LI->getType(),
661 LI->getPointerOperand(), LI->getOrdering());
663
664 LI->replaceAllUsesWith(Val);
665 LI->eraseFromParent();
666
667 return true;
668}
669
670bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
671 ReplacementIRBuilder Builder(LI, *DL);
672 AtomicOrdering Order = LI->getOrdering();
673 if (Order == AtomicOrdering::Unordered)
674 Order = AtomicOrdering::Monotonic;
675
676 Value *Addr = LI->getPointerOperand();
677 Type *Ty = LI->getType();
678
679 // cmpxchg supports only integer and pointer operands. If the load type is
680 // FP or vector, run the cmpxchg on the same-sized integer and bitcast the
681 // result back; mirrors createCmpXchgInstFun.
682 bool NeedBitcast = Ty->isFloatingPointTy() || Ty->isVectorTy();
683 Type *CmpXchgTy = Ty;
684 if (NeedBitcast)
685 CmpXchgTy = Builder.getIntNTy(Ty->getPrimitiveSizeInBits());
686 Constant *DummyVal = Constant::getNullValue(CmpXchgTy);
687
688 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
689 Addr, DummyVal, DummyVal, LI->getAlign(), Order,
691 LI->getSyncScopeID());
692 Pair->setVolatile(LI->isVolatile());
693 Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
694 if (NeedBitcast)
695 Loaded = Builder.CreateBitCast(Loaded, Ty);
696
697 LI->replaceAllUsesWith(Loaded);
698 LI->eraseFromParent();
699
700 return true;
701}
702
703/// Convert an atomic store of a non-integral type to an integer store of the
704/// equivalent bitwidth. We used to not support floating point or vector
705/// atomics in the IR at all. The backends learned to deal with the bitcast
706/// idiom because that was the only way of expressing the notion of a atomic
707/// float or vector store. The long term plan is to teach each backend to
708/// instruction select from the original atomic store, but as a migration
709/// mechanism, we convert back to the old format which the backends understand.
710/// Each backend will need individual work to recognize the new format.
711StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
712 ReplacementIRBuilder Builder(SI, *DL);
713 auto *M = SI->getModule();
714 Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
715 M->getDataLayout());
716 Value *NewVal = SI->getValueOperand()->getType()->isPtrOrPtrVectorTy()
717 ? Builder.CreatePtrToInt(SI->getValueOperand(), NewTy)
718 : Builder.CreateBitCast(SI->getValueOperand(), NewTy);
719
720 Value *Addr = SI->getPointerOperand();
721
722 StoreInst *NewSI = Builder.CreateStore(NewVal, Addr);
723 NewSI->setAlignment(SI->getAlign());
724 NewSI->setVolatile(SI->isVolatile());
725 NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
726 LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
727 SI->eraseFromParent();
728 return NewSI;
729}
730
731void AtomicExpandImpl::expandAtomicStoreToXChg(StoreInst *SI) {
732 // This function is only called on atomic stores that are too large to be
733 // atomic if implemented as a native store. So we replace them by an
734 // atomic swap, that can be implemented for example as a ldrex/strex on ARM
735 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
736 // It is the responsibility of the target to only signal expansion via
737 // shouldExpandAtomicRMW in cases where this is required and possible.
738 ReplacementIRBuilder Builder(SI, *DL);
739 AtomicOrdering Ordering = SI->getOrdering();
740 assert(Ordering != AtomicOrdering::NotAtomic);
741 AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered
742 ? AtomicOrdering::Monotonic
743 : Ordering;
744 AtomicRMWInst *AI = Builder.CreateAtomicRMW(
745 AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
746 SI->getAlign(), RMWOrdering, SI->getSyncScopeID());
747 AI->setVolatile(SI->isVolatile());
748 SI->eraseFromParent();
749
750 // Now we have an appropriate swap instruction, lower it as usual.
751 tryExpandAtomicRMW(AI);
752}
753
754static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr,
755 Value *Loaded, Value *NewVal, Align AddrAlign,
756 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
757 bool IsVolatile, Value *&Success,
758 Value *&NewLoaded, Instruction *MetadataSrc) {
759 Type *OrigTy = NewVal->getType();
760
761 // This code can go away when cmpxchg supports FP and vector types.
762 assert(!OrigTy->isPointerTy());
763 bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy();
764 if (NeedBitcast) {
765 IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
766 NewVal = Builder.CreateBitCast(NewVal, IntTy);
767 Loaded = Builder.CreateBitCast(Loaded, IntTy);
768 }
769
770 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
771 Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
773 Pair->setVolatile(IsVolatile);
774 if (MetadataSrc)
775 copyMetadataForAtomic(*Pair, *MetadataSrc);
776
777 Success = Builder.CreateExtractValue(Pair, 1, "success");
778 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
779
780 if (NeedBitcast)
781 NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
782}
783
784bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {
785 LLVMContext &Ctx = AI->getModule()->getContext();
786 TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
787 switch (Kind) {
788 case TargetLoweringBase::AtomicExpansionKind::None:
789 return false;
790 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
791 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
792 unsigned ValueSize = getAtomicOpSize(AI);
793 if (ValueSize < MinCASSize) {
794 expandPartwordAtomicRMW(AI,
795 TargetLoweringBase::AtomicExpansionKind::LLSC);
796 } else {
797 auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
798 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
799 AI->getValOperand());
800 };
801 expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
802 AI->getAlign(), AI->getOrdering(), PerformOp);
803 }
804 return true;
805 }
806 case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
807 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
808 unsigned ValueSize = getAtomicOpSize(AI);
809 if (ValueSize < MinCASSize) {
810 expandPartwordAtomicRMW(AI,
811 TargetLoweringBase::AtomicExpansionKind::CmpXChg);
812 } else {
814 Ctx.getSyncScopeNames(SSNs);
815 auto MemScope = SSNs[AI->getSyncScopeID()].empty()
816 ? "system"
817 : SSNs[AI->getSyncScopeID()];
818 OptimizationRemarkEmitter ORE(AI->getFunction());
819 ORE.emit([&]() {
820 return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
821 << "A compare and swap loop was generated for an atomic "
822 << AI->getOperationName(AI->getOperation()) << " operation at "
823 << MemScope << " memory scope";
824 });
825 expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
826 }
827 return true;
828 }
829 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
830 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
831 unsigned ValueSize = getAtomicOpSize(AI);
832 if (ValueSize < MinCASSize) {
834 // Widen And/Or/Xor and give the target another chance at expanding it.
837 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
838 return true;
839 }
840 }
841 expandAtomicRMWToMaskedIntrinsic(AI);
842 return true;
843 }
844 case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: {
846 return true;
847 }
848 case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: {
850 return true;
851 }
852 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
853 return lowerAtomicRMWInst(AI);
854 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
855 TLI->emitExpandAtomicRMW(AI);
856 return true;
857 default:
858 llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
859 }
860}
861
862namespace {
863
864struct PartwordMaskValues {
865 // These three fields are guaranteed to be set by createMaskInstrs.
866 Type *WordType = nullptr;
867 Type *ValueType = nullptr;
868 Type *IntValueType = nullptr;
869 Value *AlignedAddr = nullptr;
870 Align AlignedAddrAlignment;
871 // The remaining fields can be null.
872 Value *ShiftAmt = nullptr;
873 Value *Mask = nullptr;
874 Value *Inv_Mask = nullptr;
875};
876
877[[maybe_unused]]
878raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
879 auto PrintObj = [&O](auto *V) {
880 if (V)
881 O << *V;
882 else
883 O << "nullptr";
884 O << '\n';
885 };
886 O << "PartwordMaskValues {\n";
887 O << " WordType: ";
888 PrintObj(PMV.WordType);
889 O << " ValueType: ";
890 PrintObj(PMV.ValueType);
891 O << " AlignedAddr: ";
892 PrintObj(PMV.AlignedAddr);
893 O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
894 O << " ShiftAmt: ";
895 PrintObj(PMV.ShiftAmt);
896 O << " Mask: ";
897 PrintObj(PMV.Mask);
898 O << " Inv_Mask: ";
899 PrintObj(PMV.Inv_Mask);
900 O << "}\n";
901 return O;
902}
903
904} // end anonymous namespace
905
906/// This is a helper function which builds instructions to provide
907/// values necessary for partword atomic operations. It takes an
908/// incoming address, Addr, and ValueType, and constructs the address,
909/// shift-amounts and masks needed to work with a larger value of size
910/// WordSize.
911///
912/// AlignedAddr: Addr rounded down to a multiple of WordSize
913///
914/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
915/// from AlignAddr for it to have the same value as if
916/// ValueType was loaded from Addr.
917///
918/// Mask: Value to mask with the value loaded from AlignAddr to
919/// include only the part that would've been loaded from Addr.
920///
921/// Inv_Mask: The inverse of Mask.
922static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
924 Value *Addr, Align AddrAlign,
925 unsigned MinWordSize) {
926 PartwordMaskValues PMV;
927
928 Module *M = I->getModule();
929 LLVMContext &Ctx = M->getContext();
930 const DataLayout &DL = M->getDataLayout();
931 unsigned ValueSize = DL.getTypeStoreSize(ValueType);
932
933 PMV.ValueType = PMV.IntValueType = ValueType;
934 if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy())
935 PMV.IntValueType =
936 Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits());
937
938 PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
939 : ValueType;
940 if (PMV.ValueType == PMV.WordType) {
941 PMV.AlignedAddr = Addr;
942 PMV.AlignedAddrAlignment = AddrAlign;
943 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
944 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true);
945 return PMV;
946 }
947
948 PMV.AlignedAddrAlignment = Align(MinWordSize);
949
950 assert(ValueSize < MinWordSize);
951
952 PointerType *PtrTy = cast<PointerType>(Addr->getType());
953 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
954 Value *PtrLSB;
955
956 if (AddrAlign < MinWordSize) {
957 PMV.AlignedAddr = Builder.CreateIntrinsic(
958 Intrinsic::ptrmask, {PtrTy, IntTy},
959 {Addr, ConstantInt::getSigned(IntTy, ~(uint64_t)(MinWordSize - 1))},
960 nullptr, "AlignedAddr");
961
962 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
963 PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
964 } else {
965 // If the alignment is high enough, the LSB are known 0.
966 PMV.AlignedAddr = Addr;
967 PtrLSB = ConstantInt::getNullValue(IntTy);
968 }
969
970 if (DL.isLittleEndian()) {
971 // turn bytes into bits
972 PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
973 } else {
974 // turn bytes into bits, and count from the other side.
975 PMV.ShiftAmt = Builder.CreateShl(
976 Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
977 }
978
979 PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
980 PMV.Mask = Builder.CreateShl(
981 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
982 "Mask");
983
984 PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
985
986 return PMV;
987}
988
989static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord,
990 const PartwordMaskValues &PMV) {
991 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
992 if (PMV.WordType == PMV.ValueType)
993 return WideWord;
994
995 Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
996 Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted");
997 return Builder.CreateBitCast(Trunc, PMV.ValueType);
998}
999
1000static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord,
1001 Value *Updated, const PartwordMaskValues &PMV) {
1002 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
1003 assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
1004 if (PMV.WordType == PMV.ValueType)
1005 return Updated;
1006
1007 Updated = Builder.CreateBitCast(Updated, PMV.IntValueType);
1008
1009 Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
1010 Value *Shift =
1011 Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
1012 Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
1013 Value *Or = Builder.CreateOr(And, Shift, "inserted");
1014 return Or;
1015}
1016
1017/// Emit IR to implement a masked version of a given atomicrmw
1018/// operation. (That is, only the bits under the Mask should be
1019/// affected by the operation)
1021 IRBuilderBase &Builder, Value *Loaded,
1022 Value *Shifted_Inc, Value *Inc,
1023 const PartwordMaskValues &PMV) {
1024 // TODO: update to use
1025 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
1026 // to merge bits from two values without requiring PMV.Inv_Mask.
1027 switch (Op) {
1028 case AtomicRMWInst::Xchg: {
1029 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
1030 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
1031 return FinalVal;
1032 }
1033 case AtomicRMWInst::Or:
1034 case AtomicRMWInst::Xor:
1035 case AtomicRMWInst::And:
1036 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
1037 case AtomicRMWInst::Add:
1038 case AtomicRMWInst::Sub:
1039 case AtomicRMWInst::Nand: {
1040 // The other arithmetic ops need to be masked into place.
1041 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);
1042 Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
1043 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
1044 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
1045 return FinalVal;
1046 }
1047 case AtomicRMWInst::Max:
1048 case AtomicRMWInst::Min:
1063 // Finally, other ops will operate on the full value, so truncate down to
1064 // the original size, and expand out again after doing the
1065 // operation. Bitcasts will be inserted for FP values.
1066 Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
1067 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
1068 Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
1069 return FinalVal;
1070 }
1071 default:
1072 llvm_unreachable("Unknown atomic op");
1073 }
1074}
1075
1076/// Expand a sub-word atomicrmw operation into an appropriate
1077/// word-sized operation.
1078///
1079/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
1080/// way as a typical atomicrmw expansion. The only difference here is
1081/// that the operation inside of the loop may operate upon only a
1082/// part of the value.
1083void AtomicExpandImpl::expandPartwordAtomicRMW(
1084 AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
1085 // Widen And/Or/Xor and give the target another chance at expanding it.
1089 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
1090 return;
1091 }
1092 AtomicOrdering MemOpOrder = AI->getOrdering();
1093 SyncScope::ID SSID = AI->getSyncScopeID();
1094
1095 ReplacementIRBuilder Builder(AI, *DL);
1096
1097 PartwordMaskValues PMV =
1098 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1099 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1100
1101 Value *ValOperand_Shifted = nullptr;
1104 Value *ValOp = Builder.CreateBitCast(AI->getValOperand(), PMV.IntValueType);
1105 ValOperand_Shifted =
1106 Builder.CreateShl(Builder.CreateZExt(ValOp, PMV.WordType), PMV.ShiftAmt,
1107 "ValOperand_Shifted");
1108 }
1109
1110 auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
1111 return performMaskedAtomicOp(Op, Builder, Loaded, ValOperand_Shifted,
1112 AI->getValOperand(), PMV);
1113 };
1114
1115 Value *OldResult;
1116 if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
1117 OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
1118 PMV.AlignedAddrAlignment, MemOpOrder, SSID,
1119 AI->isVolatile(), PerformPartwordOp,
1121 } else {
1122 assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
1123 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
1124 PMV.AlignedAddrAlignment, MemOpOrder,
1125 PerformPartwordOp);
1126 }
1127
1128 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1129 AI->replaceAllUsesWith(FinalOldResult);
1130 AI->eraseFromParent();
1131}
1132
1133// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
1134AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
1135 ReplacementIRBuilder Builder(AI, *DL);
1137
1139 Op == AtomicRMWInst::And) &&
1140 "Unable to widen operation");
1141
1142 PartwordMaskValues PMV =
1143 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1144 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1145
1146 Value *ValOperand_Shifted =
1147 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
1148 PMV.ShiftAmt, "ValOperand_Shifted");
1149
1150 Value *NewOperand;
1151
1152 if (Op == AtomicRMWInst::And)
1153 NewOperand =
1154 Builder.CreateOr(ValOperand_Shifted, PMV.Inv_Mask, "AndOperand");
1155 else
1156 NewOperand = ValOperand_Shifted;
1157
1158 AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
1159 Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
1160 AI->getOrdering(), AI->getSyncScopeID());
1161
1162 NewAI->setVolatile(AI->isVolatile());
1163 copyMetadataForAtomic(*NewAI, *AI);
1164
1165 Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
1166 AI->replaceAllUsesWith(FinalOldResult);
1167 AI->eraseFromParent();
1168 return NewAI;
1169}
1170
1171bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
1172 // The basic idea here is that we're expanding a cmpxchg of a
1173 // smaller memory size up to a word-sized cmpxchg. To do this, we
1174 // need to add a retry-loop for strong cmpxchg, so that
1175 // modifications to other parts of the word don't cause a spurious
1176 // failure.
1177
1178 // This generates code like the following:
1179 // [[Setup mask values PMV.*]]
1180 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
1181 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
1182 // %InitLoaded = load i32* %addr
1183 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
1184 // br partword.cmpxchg.loop
1185 // partword.cmpxchg.loop:
1186 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
1187 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
1188 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
1189 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
1190 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
1191 // i32 %FullWord_NewVal success_ordering failure_ordering
1192 // %OldVal = extractvalue { i32, i1 } %NewCI, 0
1193 // %Success = extractvalue { i32, i1 } %NewCI, 1
1194 // br i1 %Success, label %partword.cmpxchg.end,
1195 // label %partword.cmpxchg.failure
1196 // partword.cmpxchg.failure:
1197 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
1198 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
1199 // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
1200 // label %partword.cmpxchg.end
1201 // partword.cmpxchg.end:
1202 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
1203 // %FinalOldVal = trunc i32 %tmp1 to i8
1204 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
1205 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
1206
1207 Value *Addr = CI->getPointerOperand();
1208 Value *Cmp = CI->getCompareOperand();
1209 Value *NewVal = CI->getNewValOperand();
1210
1211 BasicBlock *BB = CI->getParent();
1212 Function *F = BB->getParent();
1213 ReplacementIRBuilder Builder(CI, *DL);
1214 LLVMContext &Ctx = Builder.getContext();
1215
1216 BasicBlock *EndBB =
1217 BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
1218 auto FailureBB =
1219 BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
1220 auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
1221
1222 // The split call above "helpfully" added a branch at the end of BB
1223 // (to the wrong place).
1224 std::prev(BB->end())->eraseFromParent();
1225 Builder.SetInsertPoint(BB);
1226
1227 PartwordMaskValues PMV =
1228 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1229 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1230
1231 // Shift the incoming values over, into the right location in the word.
1232 Value *NewVal_Shifted =
1233 Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
1234 Value *Cmp_Shifted =
1235 Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
1236
1237 // Load the entire current word, and mask into place the expected and new
1238 // values
1239 LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
1240 Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
1241 Builder.CreateBr(LoopBB);
1242
1243 // partword.cmpxchg.loop:
1244 Builder.SetInsertPoint(LoopBB);
1245 PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
1246 Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
1247
1248 // The initial load must be atomic with the same synchronization scope
1249 // to avoid a data race with concurrent stores. If the instruction being
1250 // emulated is volatile, issue a volatile load.
1251 // addIncoming is done first so that any replaceAllUsesWith calls during
1252 // normalization correctly update the PHI incoming value.
1253 InitLoaded->setVolatile(CI->isVolatile());
1255 InitLoaded->setAtomic(AtomicOrdering::Monotonic, CI->getSyncScopeID());
1256 // The newly created load might need to be lowered further. Because it is
1257 // created in the same block as the atomicrmw, the AtomicExpand loop will
1258 // not process it again.
1259 processAtomicInstr(InitLoaded);
1260 }
1261
1262 // Mask/Or the expected and new values into place in the loaded word.
1263 Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
1264 Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
1265 AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
1266 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1268 NewCI->setVolatile(CI->isVolatile());
1269 // When we're building a strong cmpxchg, we need a loop, so you
1270 // might think we could use a weak cmpxchg inside. But, using strong
1271 // allows the below comparison for ShouldContinue, and we're
1272 // expecting the underlying cmpxchg to be a machine instruction,
1273 // which is strong anyways.
1274 NewCI->setWeak(CI->isWeak());
1275
1276 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1277 Value *Success = Builder.CreateExtractValue(NewCI, 1);
1278
1279 if (CI->isWeak())
1280 Builder.CreateBr(EndBB);
1281 else
1282 Builder.CreateCondBr(Success, EndBB, FailureBB);
1283
1284 // partword.cmpxchg.failure:
1285 Builder.SetInsertPoint(FailureBB);
1286 // Upon failure, verify that the masked-out part of the loaded value
1287 // has been modified. If it didn't, abort the cmpxchg, since the
1288 // masked-in part must've.
1289 Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
1290 Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
1291 Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
1292
1293 // Add the second value to the phi from above
1294 Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
1295
1296 // partword.cmpxchg.end:
1297 Builder.SetInsertPoint(CI);
1298
1299 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1300 Value *Res = PoisonValue::get(CI->getType());
1301 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1302 Res = Builder.CreateInsertValue(Res, Success, 1);
1303
1304 CI->replaceAllUsesWith(Res);
1305 CI->eraseFromParent();
1306 return true;
1307}
1308
1309void AtomicExpandImpl::expandAtomicOpToLLSC(
1310 Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
1311 AtomicOrdering MemOpOrder,
1312 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1313 ReplacementIRBuilder Builder(I, *DL);
1314 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
1315 MemOpOrder, PerformOp);
1316
1317 I->replaceAllUsesWith(Loaded);
1318 I->eraseFromParent();
1319}
1320
1321void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1322 ReplacementIRBuilder Builder(AI, *DL);
1323
1324 PartwordMaskValues PMV =
1325 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1326 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1327
1328 // The value operand must be sign-extended for signed min/max so that the
1329 // target's signed comparison instructions can be used. Otherwise, just
1330 // zero-ext.
1331 Instruction::CastOps CastOp = Instruction::ZExt;
1332 AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1333 if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
1334 CastOp = Instruction::SExt;
1335
1336 Value *ValOperand_Shifted = Builder.CreateShl(
1337 Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
1338 PMV.ShiftAmt, "ValOperand_Shifted");
1339 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1340 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1341 AI->getOrdering());
1342 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1343 AI->replaceAllUsesWith(FinalOldResult);
1344 AI->eraseFromParent();
1345}
1346
1347void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1348 AtomicCmpXchgInst *CI) {
1349 ReplacementIRBuilder Builder(CI, *DL);
1350
1351 PartwordMaskValues PMV = createMaskInstrs(
1352 Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
1353 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1354
1355 Value *CmpVal_Shifted = Builder.CreateShl(
1356 Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
1357 "CmpVal_Shifted");
1358 Value *NewVal_Shifted = Builder.CreateShl(
1359 Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
1360 "NewVal_Shifted");
1362 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1363 CI->getMergedOrdering());
1364 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1365 Value *Res = PoisonValue::get(CI->getType());
1366 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1367 Value *Success = Builder.CreateICmpEQ(
1368 CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
1369 Res = Builder.CreateInsertValue(Res, Success, 1);
1370
1371 CI->replaceAllUsesWith(Res);
1372 CI->eraseFromParent();
1373}
1374
1375Value *AtomicExpandImpl::insertRMWLLSCLoop(
1376 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1377 AtomicOrdering MemOpOrder,
1378 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1379 LLVMContext &Ctx = Builder.getContext();
1380 BasicBlock *BB = Builder.GetInsertBlock();
1381 Function *F = BB->getParent();
1382
1383 assert(AddrAlign >= F->getDataLayout().getTypeStoreSize(ResultTy) &&
1384 "Expected at least natural alignment at this point.");
1385
1386 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1387 //
1388 // The standard expansion we produce is:
1389 // [...]
1390 // atomicrmw.start:
1391 // %loaded = @load.linked(%addr)
1392 // %new = some_op iN %loaded, %incr
1393 // %stored = @store_conditional(%new, %addr)
1394 // %try_again = icmp i32 ne %stored, 0
1395 // br i1 %try_again, label %loop, label %atomicrmw.end
1396 // atomicrmw.end:
1397 // [...]
1398 BasicBlock *ExitBB =
1399 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1400 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1401
1402 // The split call above "helpfully" added a branch at the end of BB (to the
1403 // wrong place).
1404 std::prev(BB->end())->eraseFromParent();
1405 Builder.SetInsertPoint(BB);
1406 Builder.CreateBr(LoopBB);
1407
1408 // Start the main loop block now that we've taken care of the preliminaries.
1409 Builder.SetInsertPoint(LoopBB);
1410 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder);
1411
1412 Value *NewVal = PerformOp(Builder, Loaded);
1413
1414 Value *StoreSuccess =
1415 TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1416 Value *TryAgain = Builder.CreateICmpNE(
1417 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1418
1419 Instruction *CondBr = Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1420
1421 // Atomic RMW expands to a Load-linked / Store-Conditional loop, because it is
1422 // hard to predict precise branch weigths we mark the branch as "unknown"
1423 // (50/50) to prevent misleading optimizations.
1425
1426 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1427 return Loaded;
1428}
1429
1430/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1431/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1432/// IR. As a migration step, we convert back to what use to be the standard
1433/// way to represent a pointer cmpxchg so that we can update backends one by
1434/// one.
1435AtomicCmpXchgInst *
1436AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1437 auto *M = CI->getModule();
1438 Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1439 M->getDataLayout());
1440
1441 ReplacementIRBuilder Builder(CI, *DL);
1442
1443 Value *Addr = CI->getPointerOperand();
1444
1445 Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1446 Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1447
1448 auto *NewCI = Builder.CreateAtomicCmpXchg(
1449 Addr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
1450 CI->getFailureOrdering(), CI->getSyncScopeID());
1451 NewCI->setVolatile(CI->isVolatile());
1452 NewCI->setWeak(CI->isWeak());
1453 LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1454
1455 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1456 Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1457
1458 OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1459
1460 Value *Res = PoisonValue::get(CI->getType());
1461 Res = Builder.CreateInsertValue(Res, OldVal, 0);
1462 Res = Builder.CreateInsertValue(Res, Succ, 1);
1463
1464 CI->replaceAllUsesWith(Res);
1465 CI->eraseFromParent();
1466 return NewCI;
1467}
1468
1469bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1470 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1471 AtomicOrdering FailureOrder = CI->getFailureOrdering();
1472 Value *Addr = CI->getPointerOperand();
1473 BasicBlock *BB = CI->getParent();
1474 Function *F = BB->getParent();
1475 LLVMContext &Ctx = F->getContext();
1476 // If shouldInsertFencesForAtomic() returns true, then the target does not
1477 // want to deal with memory orders, and emitLeading/TrailingFence should take
1478 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1479 // should preserve the ordering.
1480 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1481 AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1482 ? AtomicOrdering::Monotonic
1483 : CI->getMergedOrdering();
1484
1485 // In implementations which use a barrier to achieve release semantics, we can
1486 // delay emitting this barrier until we know a store is actually going to be
1487 // attempted. The cost of this delay is that we need 2 copies of the block
1488 // emitting the load-linked, affecting code size.
1489 //
1490 // Ideally, this logic would be unconditional except for the minsize check
1491 // since in other cases the extra blocks naturally collapse down to the
1492 // minimal loop. Unfortunately, this puts too much stress on later
1493 // optimisations so we avoid emitting the extra logic in those cases too.
1494 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1495 SuccessOrder != AtomicOrdering::Monotonic &&
1496 SuccessOrder != AtomicOrdering::Acquire &&
1497 !F->hasMinSize();
1498
1499 // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1500 // do it even on minsize.
1501 bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1502
1503 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1504 //
1505 // The full expansion we produce is:
1506 // [...]
1507 // %aligned.addr = ...
1508 // cmpxchg.start:
1509 // %unreleasedload = @load.linked(%aligned.addr)
1510 // %unreleasedload.extract = extract value from %unreleasedload
1511 // %should_store = icmp eq %unreleasedload.extract, %desired
1512 // br i1 %should_store, label %cmpxchg.releasingstore,
1513 // label %cmpxchg.nostore
1514 // cmpxchg.releasingstore:
1515 // fence?
1516 // br label cmpxchg.trystore
1517 // cmpxchg.trystore:
1518 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1519 // [%releasedload, %cmpxchg.releasedload]
1520 // %updated.new = insert %new into %loaded.trystore
1521 // %stored = @store_conditional(%updated.new, %aligned.addr)
1522 // %success = icmp eq i32 %stored, 0
1523 // br i1 %success, label %cmpxchg.success,
1524 // label %cmpxchg.releasedload/%cmpxchg.failure
1525 // cmpxchg.releasedload:
1526 // %releasedload = @load.linked(%aligned.addr)
1527 // %releasedload.extract = extract value from %releasedload
1528 // %should_store = icmp eq %releasedload.extract, %desired
1529 // br i1 %should_store, label %cmpxchg.trystore,
1530 // label %cmpxchg.failure
1531 // cmpxchg.success:
1532 // fence?
1533 // br label %cmpxchg.end
1534 // cmpxchg.nostore:
1535 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1536 // [%releasedload,
1537 // %cmpxchg.releasedload/%cmpxchg.trystore]
1538 // @load_linked_fail_balance()?
1539 // br label %cmpxchg.failure
1540 // cmpxchg.failure:
1541 // fence?
1542 // br label %cmpxchg.end
1543 // cmpxchg.end:
1544 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1545 // [%loaded.trystore, %cmpxchg.trystore]
1546 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1547 // %loaded = extract value from %loaded.exit
1548 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1549 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1550 // [...]
1551 BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1552 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1553 auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1554 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1555 auto ReleasedLoadBB =
1556 BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1557 auto TryStoreBB =
1558 BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1559 auto ReleasingStoreBB =
1560 BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1561 auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1562
1563 ReplacementIRBuilder Builder(CI, *DL);
1564
1565 // The split call above "helpfully" added a branch at the end of BB (to the
1566 // wrong place), but we might want a fence too. It's easiest to just remove
1567 // the branch entirely.
1568 std::prev(BB->end())->eraseFromParent();
1569 Builder.SetInsertPoint(BB);
1570 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1571 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1572
1573 PartwordMaskValues PMV =
1574 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1575 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1576 Builder.CreateBr(StartBB);
1577
1578 // Start the main loop block now that we've taken care of the preliminaries.
1579 Builder.SetInsertPoint(StartBB);
1580 Value *UnreleasedLoad =
1581 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1582 Value *UnreleasedLoadExtract =
1583 extractMaskedValue(Builder, UnreleasedLoad, PMV);
1584 Value *ShouldStore = Builder.CreateICmpEQ(
1585 UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
1586
1587 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1588 // jump straight past that fence instruction (if it exists).
1589 Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB,
1590 MDBuilder(F->getContext()).createLikelyBranchWeights());
1591
1592 Builder.SetInsertPoint(ReleasingStoreBB);
1593 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1594 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1595 Builder.CreateBr(TryStoreBB);
1596
1597 Builder.SetInsertPoint(TryStoreBB);
1598 PHINode *LoadedTryStore =
1599 Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
1600 LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1601 Value *NewValueInsert =
1602 insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
1603 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1604 PMV.AlignedAddr, MemOpOrder);
1605 StoreSuccess = Builder.CreateICmpEQ(
1606 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1607 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1608 Builder.CreateCondBr(StoreSuccess, SuccessBB,
1609 CI->isWeak() ? FailureBB : RetryBB,
1610 MDBuilder(F->getContext()).createLikelyBranchWeights());
1611
1612 Builder.SetInsertPoint(ReleasedLoadBB);
1613 Value *SecondLoad;
1614 if (HasReleasedLoadBB) {
1615 SecondLoad =
1616 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1617 Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
1618 ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
1619 CI->getCompareOperand(), "should_store");
1620
1621 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1622 // jump straight past that fence instruction (if it exists).
1623 Builder.CreateCondBr(
1624 ShouldStore, TryStoreBB, NoStoreBB,
1625 MDBuilder(F->getContext()).createLikelyBranchWeights());
1626 // Update PHI node in TryStoreBB.
1627 LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
1628 } else
1629 Builder.CreateUnreachable();
1630
1631 // Make sure later instructions don't get reordered with a fence if
1632 // necessary.
1633 Builder.SetInsertPoint(SuccessBB);
1634 if (ShouldInsertFencesForAtomic ||
1636 TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1637 Builder.CreateBr(ExitBB);
1638
1639 Builder.SetInsertPoint(NoStoreBB);
1640 PHINode *LoadedNoStore =
1641 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
1642 LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
1643 if (HasReleasedLoadBB)
1644 LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
1645
1646 // In the failing case, where we don't execute the store-conditional, the
1647 // target might want to balance out the load-linked with a dedicated
1648 // instruction (e.g., on ARM, clearing the exclusive monitor).
1650 Builder.CreateBr(FailureBB);
1651
1652 Builder.SetInsertPoint(FailureBB);
1653 PHINode *LoadedFailure =
1654 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
1655 LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
1656 if (CI->isWeak())
1657 LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
1658 if (ShouldInsertFencesForAtomic)
1659 TLI->emitTrailingFence(Builder, CI, FailureOrder);
1660 Builder.CreateBr(ExitBB);
1661
1662 // Finally, we have control-flow based knowledge of whether the cmpxchg
1663 // succeeded or not. We expose this to later passes by converting any
1664 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1665 // PHI.
1666 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1667 PHINode *LoadedExit =
1668 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
1669 LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
1670 LoadedExit->addIncoming(LoadedFailure, FailureBB);
1671 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
1672 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1673 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1674
1675 // This is the "exit value" from the cmpxchg expansion. It may be of
1676 // a type wider than the one in the cmpxchg instruction.
1677 Value *LoadedFull = LoadedExit;
1678
1679 Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
1680 Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
1681
1682 // Look for any users of the cmpxchg that are just comparing the loaded value
1683 // against the desired one, and replace them with the CFG-derived version.
1685 for (auto *User : CI->users()) {
1686 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
1687 if (!EV)
1688 continue;
1689
1690 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1691 "weird extraction from { iN, i1 }");
1692
1693 if (EV->getIndices()[0] == 0)
1694 EV->replaceAllUsesWith(Loaded);
1695 else
1697
1698 PrunedInsts.push_back(EV);
1699 }
1700
1701 // We can remove the instructions now we're no longer iterating through them.
1702 for (auto *EV : PrunedInsts)
1703 EV->eraseFromParent();
1704
1705 if (!CI->use_empty()) {
1706 // Some use of the full struct return that we don't understand has happened,
1707 // so we've got to reconstruct it properly.
1708 Value *Res;
1709 Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0);
1710 Res = Builder.CreateInsertValue(Res, Success, 1);
1711
1712 CI->replaceAllUsesWith(Res);
1713 }
1714
1715 CI->eraseFromParent();
1716 return true;
1717}
1718
1719bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
1720 if (RMWI->isVolatile())
1721 return false;
1722 // TODO: Add floating point support.
1723 auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1724 if (!C)
1725 return false;
1726
1727 switch (RMWI->getOperation()) {
1728 case AtomicRMWInst::Add:
1729 case AtomicRMWInst::Sub:
1730 case AtomicRMWInst::Or:
1731 case AtomicRMWInst::Xor:
1732 return C->isZero();
1733 case AtomicRMWInst::And:
1734 return C->isMinusOne();
1735 case AtomicRMWInst::Min:
1736 return C->isMaxValue(true);
1737 case AtomicRMWInst::Max:
1738 return C->isMinValue(true);
1740 return C->isMaxValue(false);
1742 return C->isMinValue(false);
1743 default:
1744 return false;
1745 }
1746}
1747
1748bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
1749 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1750 tryExpandAtomicLoad(ResultingLoad);
1751 return true;
1752 }
1753 return false;
1754}
1755
1756Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
1757 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1758 AtomicOrdering MemOpOrder, SyncScope::ID SSID, bool IsVolatile,
1759 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
1760 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc) {
1761 LLVMContext &Ctx = Builder.getContext();
1762 BasicBlock *BB = Builder.GetInsertBlock();
1763 Function *F = BB->getParent();
1764
1765 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1766 //
1767 // The standard expansion we produce is:
1768 // [...]
1769 // %init_loaded = load atomic iN* %addr
1770 // br label %loop
1771 // loop:
1772 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1773 // %new = some_op iN %loaded, %incr
1774 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1775 // %new_loaded = extractvalue { iN, i1 } %pair, 0
1776 // %success = extractvalue { iN, i1 } %pair, 1
1777 // br i1 %success, label %atomicrmw.end, label %loop
1778 // atomicrmw.end:
1779 // [...]
1780 BasicBlock *ExitBB =
1781 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1782 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1783
1784 // The split call above "helpfully" added a branch at the end of BB (to the
1785 // wrong place), but we want a load. It's easiest to just remove
1786 // the branch entirely.
1787 std::prev(BB->end())->eraseFromParent();
1788 Builder.SetInsertPoint(BB);
1789 LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
1790 Builder.CreateBr(LoopBB);
1791
1792 // Start the main loop block now that we've taken care of the preliminaries.
1793 Builder.SetInsertPoint(LoopBB);
1794 PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1795 Loaded->addIncoming(InitLoaded, BB);
1796
1797 // The initial load must be atomic with the same synchronization scope
1798 // to avoid a data race with concurrent stores. If the instruction being
1799 // emulated is volatile, issue a volatile load.
1800 // addIncoming is done first so that any replaceAllUsesWith calls during
1801 // normalization correctly update the PHI incoming value.
1802 InitLoaded->setVolatile(IsVolatile);
1804 InitLoaded->setAtomic(AtomicOrdering::Monotonic, SSID);
1805 // The newly created load might need to be lowered further. Because it is
1806 // created in the same block as the atomicrmw, the AtomicExpand loop will
1807 // not process it again.
1808 processAtomicInstr(InitLoaded);
1809 }
1810
1811 Value *NewVal = PerformOp(Builder, Loaded);
1812
1813 Value *NewLoaded = nullptr;
1814 Value *Success = nullptr;
1815
1816 CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1817 MemOpOrder == AtomicOrdering::Unordered
1818 ? AtomicOrdering::Monotonic
1819 : MemOpOrder,
1820 SSID, IsVolatile, Success, NewLoaded, MetadataSrc);
1821 assert(Success && NewLoaded);
1822
1823 Loaded->addIncoming(NewLoaded, LoopBB);
1824
1825 Instruction *CondBr = Builder.CreateCondBr(Success, ExitBB, LoopBB);
1826
1827 // Atomic RMW expands to a cmpxchg loop, Since precise branch weights
1828 // cannot be easily determined here, we mark the branch as "unknown" (50/50)
1829 // to prevent misleading optimizations.
1831
1832 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1833 return NewLoaded;
1834}
1835
1836bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1837 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1838 unsigned ValueSize = getAtomicOpSize(CI);
1839
1840 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1841 default:
1842 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1843 case TargetLoweringBase::AtomicExpansionKind::None:
1844 if (ValueSize < MinCASSize)
1845 return expandPartwordCmpXchg(CI);
1846 return false;
1847 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
1848 return expandAtomicCmpXchg(CI);
1849 }
1850 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
1851 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1852 return true;
1853 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
1854 return lowerAtomicCmpXchgInst(CI);
1855 case TargetLoweringBase::AtomicExpansionKind::CustomExpand: {
1856 TLI->emitExpandAtomicCmpXchg(CI);
1857 return true;
1858 }
1859 }
1860}
1861
1862bool AtomicExpandImpl::expandAtomicRMWToCmpXchg(
1863 AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg) {
1864 ReplacementIRBuilder Builder(AI, AI->getDataLayout());
1865 Builder.setIsFPConstrained(
1866 AI->getFunction()->hasFnAttribute(Attribute::StrictFP));
1867
1868 // FIXME: If FP exceptions are observable, we should force them off for the
1869 // loop for the FP atomics.
1870 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
1871 Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
1872 AI->getOrdering(), AI->getSyncScopeID(), AI->isVolatile(),
1873 [&](IRBuilderBase &Builder, Value *Loaded) {
1874 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1875 AI->getValOperand());
1876 },
1877 CreateCmpXchg, /*MetadataSrc=*/AI);
1878
1879 AI->replaceAllUsesWith(Loaded);
1880 AI->eraseFromParent();
1881 return true;
1882}
1883
1884// In order to use one of the sized library calls such as
1885// __atomic_fetch_add_4, the alignment must be sufficient, the size
1886// must be one of the potentially-specialized sizes, and the value
1887// type must actually exist in C on the target (otherwise, the
1888// function wouldn't actually be defined.)
1889static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1890 const DataLayout &DL) {
1891 // TODO: "LargestSize" is an approximation for "largest type that
1892 // you can express in C". It seems to be the case that int128 is
1893 // supported on all 64-bit platforms, otherwise only up to 64-bit
1894 // integers are supported. If we get this wrong, then we'll try to
1895 // call a sized libcall that doesn't actually exist. There should
1896 // really be some more reliable way in LLVM of determining integer
1897 // sizes which are valid in the target's C ABI...
1898 unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1899 return Alignment >= Size &&
1900 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1901 Size <= LargestSize;
1902}
1903
1904void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) {
1905 static const RTLIB::Libcall Libcalls[6] = {
1906 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1907 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1908 unsigned Size = getAtomicOpSize(I);
1909
1910 bool Expanded = expandAtomicOpToLibcall(
1911 I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
1912 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1913 if (!Expanded)
1914 handleUnsupportedAtomicSize(I, "atomic load");
1915}
1916
1917void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {
1918 static const RTLIB::Libcall Libcalls[6] = {
1919 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1920 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1921 unsigned Size = getAtomicOpSize(I);
1922
1923 bool Expanded = expandAtomicOpToLibcall(
1924 I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
1925 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1926 if (!Expanded)
1927 handleUnsupportedAtomicSize(I, "atomic store");
1928}
1929
1930void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I,
1931 const Twine &AtomicOpName,
1932 Instruction *DiagnosticInst) {
1933 static const RTLIB::Libcall Libcalls[6] = {
1934 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1935 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1936 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1937 unsigned Size = getAtomicOpSize(I);
1938
1939 bool Expanded = expandAtomicOpToLibcall(
1940 I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
1941 I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
1942 Libcalls);
1943 if (!Expanded)
1944 handleUnsupportedAtomicSize(I, AtomicOpName, DiagnosticInst);
1945}
1946
1948 static const RTLIB::Libcall LibcallsXchg[6] = {
1949 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1950 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1951 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1952 static const RTLIB::Libcall LibcallsAdd[6] = {
1953 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1954 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1955 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1956 static const RTLIB::Libcall LibcallsSub[6] = {
1957 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1958 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1959 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1960 static const RTLIB::Libcall LibcallsAnd[6] = {
1961 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1962 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1963 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1964 static const RTLIB::Libcall LibcallsOr[6] = {
1965 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1966 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1967 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1968 static const RTLIB::Libcall LibcallsXor[6] = {
1969 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1970 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1971 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1972 static const RTLIB::Libcall LibcallsNand[6] = {
1973 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1974 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1975 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1976
1977 switch (Op) {
1979 llvm_unreachable("Should not have BAD_BINOP.");
1981 return ArrayRef(LibcallsXchg);
1982 case AtomicRMWInst::Add:
1983 return ArrayRef(LibcallsAdd);
1984 case AtomicRMWInst::Sub:
1985 return ArrayRef(LibcallsSub);
1986 case AtomicRMWInst::And:
1987 return ArrayRef(LibcallsAnd);
1988 case AtomicRMWInst::Or:
1989 return ArrayRef(LibcallsOr);
1990 case AtomicRMWInst::Xor:
1991 return ArrayRef(LibcallsXor);
1993 return ArrayRef(LibcallsNand);
1994 case AtomicRMWInst::Max:
1995 case AtomicRMWInst::Min:
2010 // No atomic libcalls are available for these.
2011 return {};
2012 }
2013 llvm_unreachable("Unexpected AtomicRMW operation.");
2014}
2015
2016void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
2017 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
2018
2019 unsigned Size = getAtomicOpSize(I);
2020
2021 bool Success = false;
2022 if (!Libcalls.empty())
2023 Success = expandAtomicOpToLibcall(
2024 I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
2025 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
2026
2027 // The expansion failed: either there were no libcalls at all for
2028 // the operation (min/max), or there were only size-specialized
2029 // libcalls (add/sub/etc) and we needed a generic. So, expand to a
2030 // CAS libcall, via a CAS loop, instead.
2031 if (!Success) {
2032 expandAtomicRMWToCmpXchg(
2033 I, [this, I](IRBuilderBase &Builder, Value *Addr, Value *Loaded,
2034 Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
2035 SyncScope::ID SSID, bool IsVolatile, Value *&Success,
2036 Value *&NewLoaded, Instruction *MetadataSrc) {
2037 // Create the CAS instruction normally...
2038 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
2039 Addr, Loaded, NewVal, Alignment, MemOpOrder,
2041 Pair->setVolatile(IsVolatile);
2042 if (MetadataSrc)
2043 copyMetadataForAtomic(*Pair, *MetadataSrc);
2044
2045 Success = Builder.CreateExtractValue(Pair, 1, "success");
2046 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
2047
2048 // ...and then expand the CAS into a libcall.
2049 expandAtomicCASToLibcall(
2050 Pair,
2051 "atomicrmw " + AtomicRMWInst::getOperationName(I->getOperation()),
2052 MetadataSrc);
2053 });
2054 }
2055}
2056
2057// A helper routine for the above expandAtomic*ToLibcall functions.
2058//
2059// 'Libcalls' contains an array of enum values for the particular
2060// ATOMIC libcalls to be emitted. All of the other arguments besides
2061// 'I' are extracted from the Instruction subclass by the
2062// caller. Depending on the particular call, some will be null.
2063bool AtomicExpandImpl::expandAtomicOpToLibcall(
2064 Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
2065 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
2066 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
2067 assert(Libcalls.size() == 6);
2068
2069 LLVMContext &Ctx = I->getContext();
2070 Module *M = I->getModule();
2071 const DataLayout &DL = M->getDataLayout();
2072 IRBuilder<> Builder(I);
2073 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
2074
2075 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
2076 Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
2077
2078 if (M->getTargetTriple().isOSWindows() && M->getTargetTriple().isX86_64() &&
2079 Size == 16) {
2080 // x86_64 Windows passes i128 as an XMM vector; on return, it is in
2081 // XMM0, and as a parameter, it is passed indirectly. The generic lowering
2082 // rules handles this correctly if we pass it as a v2i64 rather than
2083 // i128. This is what Clang does in the frontend for such types as well
2084 // (see WinX86_64ABIInfo::classify in Clang).
2085 SizedIntTy = FixedVectorType::get(Type::getInt64Ty(Ctx), 2);
2086 }
2087
2088 const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
2089
2090 // TODO: the "order" argument type is "int", not int32. So
2091 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
2092 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
2093 Constant *OrderingVal =
2094 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
2095 Constant *Ordering2Val = nullptr;
2096 if (CASExpected) {
2097 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
2098 Ordering2Val =
2099 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
2100 }
2101 bool HasResult = I->getType() != Type::getVoidTy(Ctx);
2102
2103 RTLIB::Libcall RTLibType;
2104 if (UseSizedLibcall) {
2105 switch (Size) {
2106 case 1:
2107 RTLibType = Libcalls[1];
2108 break;
2109 case 2:
2110 RTLibType = Libcalls[2];
2111 break;
2112 case 4:
2113 RTLibType = Libcalls[3];
2114 break;
2115 case 8:
2116 RTLibType = Libcalls[4];
2117 break;
2118 case 16:
2119 RTLibType = Libcalls[5];
2120 break;
2121 }
2122 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
2123 RTLibType = Libcalls[0];
2124 } else {
2125 // Can't use sized function, and there's no generic for this
2126 // operation, so give up.
2127 return false;
2128 }
2129
2130 RTLIB::LibcallImpl LibcallImpl = LibcallLowering->getLibcallImpl(RTLibType);
2131 if (LibcallImpl == RTLIB::Unsupported) {
2132 // This target does not implement the requested atomic libcall so give up.
2133 return false;
2134 }
2135
2136 // Build up the function call. There's two kinds. First, the sized
2137 // variants. These calls are going to be one of the following (with
2138 // N=1,2,4,8,16):
2139 // iN __atomic_load_N(iN *ptr, int ordering)
2140 // void __atomic_store_N(iN *ptr, iN val, int ordering)
2141 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
2142 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
2143 // int success_order, int failure_order)
2144 //
2145 // Note that these functions can be used for non-integer atomic
2146 // operations, the values just need to be bitcast to integers on the
2147 // way in and out.
2148 //
2149 // And, then, the generic variants. They look like the following:
2150 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
2151 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
2152 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
2153 // int ordering)
2154 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
2155 // void *desired, int success_order,
2156 // int failure_order)
2157 //
2158 // The different signatures are built up depending on the
2159 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
2160 // variables.
2161
2162 AllocaInst *AllocaCASExpected = nullptr;
2163 AllocaInst *AllocaValue = nullptr;
2164 AllocaInst *AllocaResult = nullptr;
2165
2166 Type *ResultTy;
2168 AttributeList Attr;
2169
2170 // 'size' argument.
2171 if (!UseSizedLibcall) {
2172 // Note, getIntPtrType is assumed equivalent to size_t.
2173 Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
2174 }
2175
2176 // 'ptr' argument.
2177 // note: This assumes all address spaces share a common libfunc
2178 // implementation and that addresses are convertable. For systems without
2179 // that property, we'd need to extend this mechanism to support AS-specific
2180 // families of atomic intrinsics.
2181 Value *PtrVal = PointerOperand;
2182 PtrVal = Builder.CreateAddrSpaceCast(PtrVal, PointerType::getUnqual(Ctx));
2183 Args.push_back(PtrVal);
2184
2185 // 'expected' argument, if present.
2186 if (CASExpected) {
2187 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
2188 AllocaCASExpected->setAlignment(AllocaAlignment);
2189 Builder.CreateLifetimeStart(AllocaCASExpected);
2190 Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
2191 Args.push_back(AllocaCASExpected);
2192 }
2193
2194 // 'val' argument ('desired' for cas), if present.
2195 if (ValueOperand) {
2196 if (UseSizedLibcall) {
2197 Value *IntValue =
2198 Builder.CreateBitPreservingCastChain(DL, ValueOperand, SizedIntTy);
2199 Args.push_back(IntValue);
2200 } else {
2201 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
2202 AllocaValue->setAlignment(AllocaAlignment);
2203 Builder.CreateLifetimeStart(AllocaValue);
2204 Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
2205 Args.push_back(AllocaValue);
2206 }
2207 }
2208
2209 // 'ret' argument.
2210 if (!CASExpected && HasResult && !UseSizedLibcall) {
2211 AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
2212 AllocaResult->setAlignment(AllocaAlignment);
2213 Builder.CreateLifetimeStart(AllocaResult);
2214 Args.push_back(AllocaResult);
2215 }
2216
2217 // 'ordering' ('success_order' for cas) argument.
2218 Args.push_back(OrderingVal);
2219
2220 // 'failure_order' argument, if present.
2221 if (Ordering2Val)
2222 Args.push_back(Ordering2Val);
2223
2224 // Now, the return type.
2225 if (CASExpected) {
2226 ResultTy = Type::getInt1Ty(Ctx);
2227 Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
2228 } else if (HasResult && UseSizedLibcall)
2229 ResultTy = SizedIntTy;
2230 else
2231 ResultTy = Type::getVoidTy(Ctx);
2232
2233 // Done with setting up arguments and return types, create the call:
2235 for (Value *Arg : Args)
2236 ArgTys.push_back(Arg->getType());
2237 FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
2238 FunctionCallee LibcallFn = M->getOrInsertFunction(
2240 Attr);
2241 CallInst *Call = Builder.CreateCall(LibcallFn, Args);
2242 Call->setAttributes(Attr);
2243 Value *Result = Call;
2244
2245 // And then, extract the results...
2246 if (ValueOperand && !UseSizedLibcall)
2247 Builder.CreateLifetimeEnd(AllocaValue);
2248
2249 if (CASExpected) {
2250 // The final result from the CAS is {load of 'expected' alloca, bool result
2251 // from call}
2252 Type *FinalResultTy = I->getType();
2253 Value *V = PoisonValue::get(FinalResultTy);
2254 Value *ExpectedOut = Builder.CreateAlignedLoad(
2255 CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
2256 Builder.CreateLifetimeEnd(AllocaCASExpected);
2257 V = Builder.CreateInsertValue(V, ExpectedOut, 0);
2258 V = Builder.CreateInsertValue(V, Result, 1);
2260 } else if (HasResult) {
2261 Value *V;
2262 if (UseSizedLibcall) {
2263 // Add bitcasts from Result's scalar type to I's <n x ptr> vector type
2264 auto *PtrTy = dyn_cast<PointerType>(I->getType()->getScalarType());
2265 auto *VTy = dyn_cast<VectorType>(I->getType());
2266 if (VTy && PtrTy && !Result->getType()->isVectorTy()) {
2267 unsigned AS = PtrTy->getAddressSpace();
2268 Value *BC = Builder.CreateBitCast(
2269 Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS)));
2270 V = Builder.CreateIntToPtr(BC, I->getType());
2271 } else
2272 V = Builder.CreateBitOrPointerCast(Result, I->getType());
2273 } else {
2274 V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
2275 AllocaAlignment);
2276 Builder.CreateLifetimeEnd(AllocaResult);
2277 }
2278 I->replaceAllUsesWith(V);
2279 }
2280 I->eraseFromParent();
2281 return true;
2282}
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, bool IsVolatile, Value *&Success, Value *&NewLoaded, Instruction *MetadataSrc)
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
Expand Atomic static false unsigned getAtomicOpSize(LoadInst *LI)
static void writeUnsupportedAtomicSizeReason(const TargetLowering *TLI, Inst *I, raw_ostream &OS)
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
static void copyMetadataForAtomic(Instruction &Dest, const Instruction &Source)
Copy metadata that's safe to preserve when widening atomics.
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
Module.h This file contains the declarations for the Module class.
static bool isIdempotentRMW(AtomicRMWInst &RMWI)
Return true if and only if the given instruction does not modify the memory location referenced.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
#define T
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file contains the declarations for profiling metadata utility functions.
This file defines the SmallString class.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
void setAlignment(Align Align)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
void setWeak(bool IsWeak)
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ FSub
*p = old - v
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMaximumNum
*p = maximumnum(old, v) maximumnum matches the behavior of llvm.maximumnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
@ FMinimumNum
*p = minimumnum(old, v) minimumnum matches the behavior of llvm.minimumnum.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
static LLVM_ABI StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
iterator end()
Definition BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
reverse_iterator rbegin()
Definition BasicBlock.h:477
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
InstListType::reverse_iterator reverse_iterator
Definition BasicBlock.h:172
reverse_iterator rend()
Definition BasicBlock.h:479
void setAttributes(AttributeList A)
Set the attributes for this call.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
ArrayRef< unsigned > getIndices() const
unsigned getNumIndices() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:867
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
BasicBlockListType::iterator iterator
Definition Function.h:70
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:723
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Definition IRBuilder.h:1957
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2671
LLVM_ABI CallInst * CreateLifetimeStart(Value *Ptr)
Create a lifetime.start intrinsic.
LLVM_ABI CallInst * CreateLifetimeEnd(Value *Ptr)
Create a lifetime.end intrinsic.
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition IRBuilder.h:1923
CondBrInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1216
UnreachableInst * CreateUnreachable()
Definition IRBuilder.h:1358
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2664
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:176
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2227
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Definition IRBuilder.h:2266
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:175
LLVM_ABI Value * CreateBitPreservingCastChain(const DataLayout &DL, Value *V, Type *NewTy)
Create a chain of casts to convert V to NewTy, preserving the bit pattern of V.
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2368
UncondBrInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1210
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2314
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition IRBuilder.h:2529
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2364
void setIsFPConstrained(bool IsCon)
Enable/Disable use of constrained floating point math.
Definition IRBuilder.h:306
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2232
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1906
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1511
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2110
LLVMContext & getContext() const
Definition IRBuilder.h:177
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1570
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2222
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2543
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:181
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition IRBuilder.h:1942
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1592
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2237
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, bool Elementwise=false)
Definition IRBuilder.h:1970
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
Definition IRBuilder.h:75
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2848
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:348
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
LLVM_ABI void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
Tracks which library functions to use for a particular subtarget.
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Return the lowering's selection of implementation call for Call.
Record a mapping from subtarget to LibcallLoweringInfo.
const LibcallLoweringInfo & getLibcallLowering(const TargetSubtargetInfo &Subtarget) const
An instruction for reading from memory.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
void setVolatile(bool V)
Specify whether this is a volatile load or not.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Metadata node.
Definition Metadata.h:1069
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
LLVMContext & getContext() const
Get the global data context.
Definition Module.h:287
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition Pass.cpp:112
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
virtual Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const
Perform a store-conditional operation to Addr.
EVT getMemValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
virtual void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const
Perform a bit test atomicrmw using a target-specific intrinsic.
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
virtual bool shouldInsertFencesForAtomic(const Instruction *I) const
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
virtual AtomicOrdering atomicOperationOrderAfterFenceSplit(const Instruction *I) const
virtual void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const
Perform a cmpxchg expansion using a target-specific method.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const
Perform a masked atomicrmw using a target-specific intrinsic.
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
virtual Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
virtual void emitExpandAtomicRMW(AtomicRMWInst *AI) const
Perform a atomicrmw expansion using a target-specific way.
virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const
virtual void emitExpandAtomicStore(StoreInst *SI) const
Perform a atomic store using a target-specific way.
virtual AtomicExpansionKind shouldCastAtomicRMWIInIR(AtomicRMWInst *RMWI) const
Returns how the given atomic atomicrmw should be cast by the IR-level AtomicExpand pass.
virtual bool shouldInsertTrailingSeqCstFenceForAtomicStore(const Instruction *I) const
Whether AtomicExpandPass should automatically insert a seq_cst trailing fence without reducing the or...
virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
virtual Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const
Perform a masked cmpxchg using a target-specific intrinsic.
virtual bool shouldIssueAtomicLoadForAtomicEmulationLoop(void) const
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
virtual void emitExpandAtomicLoad(LoadInst *LI) const
Perform a atomic load using a target-specific way.
virtual AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
virtual void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const
Perform a atomicrmw which the result is only used by comparison, using a target-specific intrinsic.
virtual AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
virtual Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const
virtual AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
virtual Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const
Inserts in the IR a target-specific intrinsic specifying a fence.
virtual LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const
On some platforms, an AtomicRMW that never actually modifies the value (such as fetch_add of 0) can b...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:288
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:282
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:186
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:285
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:313
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool canInstructionHaveMMRAs(const Instruction &I)
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
Definition InstrProf.h:143
bool isReleaseOrStronger(AtomicOrdering AO)
AtomicOrderingCABI toCABI(AtomicOrdering AO)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ABI Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
AtomicOrdering
Atomic ordering for LLVM's memory model.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
LLVM_ABI bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Next
Definition InstrProf.h:147
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createAtomicExpandLegacyPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
LLVM_ABI char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:396
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:435
Matching combinators.
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.