LLVM 23.0.0git
AtomicExpandPass.cpp
Go to the documentation of this file.
1//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass (at IR level) to replace atomic instructions with
10// __atomic_* library calls, or target specific instruction which implement the
11// same semantics in a way which better fits the target backend. This can
12// include the use of (intrinsic-based) load-linked/store-conditional loops,
13// AtomicCmpXchg, or type coercions.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/ArrayRef.h"
27#include "llvm/IR/Attributes.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/Constant.h"
30#include "llvm/IR/Constants.h"
31#include "llvm/IR/DataLayout.h"
33#include "llvm/IR/Function.h"
34#include "llvm/IR/IRBuilder.h"
35#include "llvm/IR/Instruction.h"
37#include "llvm/IR/MDBuilder.h"
39#include "llvm/IR/Module.h"
41#include "llvm/IR/Type.h"
42#include "llvm/IR/User.h"
43#include "llvm/IR/Value.h"
45#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
53#include <cassert>
54#include <cstdint>
55#include <iterator>
56
57using namespace llvm;
58
59#define DEBUG_TYPE "atomic-expand"
60
61namespace {
62
63class AtomicExpandImpl {
64 const TargetLowering *TLI = nullptr;
65 const LibcallLoweringInfo *LibcallLowering = nullptr;
66 const DataLayout *DL = nullptr;
67
68private:
69 /// Callback type for emitting a cmpxchg instruction during RMW expansion.
70 /// Parameters: (Builder, Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
71 /// SSID, IsVolatile, /* OUT */ Success, /* OUT */ NewLoaded,
72 /// MetadataSrc)
73 using CreateCmpXchgInstFun = function_ref<void(
76
77 void handleFailure(Instruction &FailedInst, const Twine &Msg) const {
78 LLVMContext &Ctx = FailedInst.getContext();
79
80 // TODO: Do not use generic error type.
81 Ctx.emitError(&FailedInst, Msg);
82
83 if (!FailedInst.getType()->isVoidTy())
84 FailedInst.replaceAllUsesWith(PoisonValue::get(FailedInst.getType()));
85 FailedInst.eraseFromParent();
86 }
87
88 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
89 bool tryInsertTrailingSeqCstFence(Instruction *AtomicI);
90 template <typename AtomicInst>
91 bool tryInsertFencesForAtomic(AtomicInst *AtomicI, bool OrderingRequiresFence,
92 AtomicOrdering NewOrdering);
93 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
94 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
95 bool tryExpandAtomicLoad(LoadInst *LI);
96 bool expandAtomicLoadToLL(LoadInst *LI);
97 bool expandAtomicLoadToCmpXchg(LoadInst *LI);
98 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
99 bool tryExpandAtomicStore(StoreInst *SI);
100 void expandAtomicStoreToXChg(StoreInst *SI);
101 bool tryExpandAtomicRMW(AtomicRMWInst *AI);
102 AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
103 Value *
104 insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,
105 Align AddrAlign, AtomicOrdering MemOpOrder,
106 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
107 void expandAtomicOpToLLSC(
108 Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
109 AtomicOrdering MemOpOrder,
110 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
111 void expandPartwordAtomicRMW(
113 AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
114 bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
115 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
116 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
117
118 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
119 static Value *insertRMWCmpXchgLoop(
120 IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
121 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
122 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
123 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc);
124 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
125
126 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
127 bool isIdempotentRMW(AtomicRMWInst *RMWI);
128 bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
129
130 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
131 Value *PointerOperand, Value *ValueOperand,
132 Value *CASExpected, AtomicOrdering Ordering,
133 AtomicOrdering Ordering2,
134 ArrayRef<RTLIB::Libcall> Libcalls);
135 void expandAtomicLoadToLibcall(LoadInst *LI);
136 void expandAtomicStoreToLibcall(StoreInst *LI);
137 void expandAtomicRMWToLibcall(AtomicRMWInst *I);
138 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
139
140 bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
141 CreateCmpXchgInstFun CreateCmpXchg);
142
143 bool processAtomicInstr(Instruction *I);
144
145public:
146 bool run(Function &F,
147 const LibcallLoweringModuleAnalysisResult &LibcallResult,
148 const TargetMachine *TM);
149};
150
151class AtomicExpandLegacy : public FunctionPass {
152public:
153 static char ID; // Pass identification, replacement for typeid
154
155 AtomicExpandLegacy() : FunctionPass(ID) {}
156
157 void getAnalysisUsage(AnalysisUsage &AU) const override {
160 }
161
162 bool runOnFunction(Function &F) override;
163};
164
165// IRBuilder to be used for replacement atomic instructions.
166struct ReplacementIRBuilder
167 : IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> {
168 MDNode *MMRAMD = nullptr;
169
170 // Preserves the DebugLoc from I, and preserves still valid metadata.
171 // Enable StrictFP builder mode when appropriate.
172 explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
173 : IRBuilder(I->getContext(), InstSimplifyFolder(DL),
175 [this](Instruction *I) { addMMRAMD(I); })) {
176 SetInsertPoint(I);
177 this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections});
178 if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
179 this->setIsFPConstrained(true);
180
181 MMRAMD = I->getMetadata(LLVMContext::MD_mmra);
182 }
183
184 void addMMRAMD(Instruction *I) {
186 I->setMetadata(LLVMContext::MD_mmra, MMRAMD);
187 }
188};
189
190} // end anonymous namespace
191
192char AtomicExpandLegacy::ID = 0;
193
194char &llvm::AtomicExpandID = AtomicExpandLegacy::ID;
195
197 "Expand Atomic instructions", false, false)
200INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE,
201 "Expand Atomic instructions", false, false)
202
203// Helper functions to retrieve the size of atomic instructions.
204static unsigned getAtomicOpSize(LoadInst *LI) {
205 const DataLayout &DL = LI->getDataLayout();
206 return DL.getTypeStoreSize(LI->getType());
207}
208
209static unsigned getAtomicOpSize(StoreInst *SI) {
210 const DataLayout &DL = SI->getDataLayout();
211 return DL.getTypeStoreSize(SI->getValueOperand()->getType());
212}
213
214static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
215 const DataLayout &DL = RMWI->getDataLayout();
216 return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
217}
218
219static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
220 const DataLayout &DL = CASI->getDataLayout();
221 return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
222}
223
224/// Copy metadata that's safe to preserve when widening atomics.
226 const Instruction &Source) {
228 Source.getAllMetadata(MD);
229 LLVMContext &Ctx = Dest.getContext();
230 MDBuilder MDB(Ctx);
231
232 for (auto [ID, N] : MD) {
233 switch (ID) {
234 case LLVMContext::MD_dbg:
235 case LLVMContext::MD_tbaa:
236 case LLVMContext::MD_tbaa_struct:
237 case LLVMContext::MD_alias_scope:
238 case LLVMContext::MD_noalias:
239 case LLVMContext::MD_noalias_addrspace:
240 case LLVMContext::MD_access_group:
241 case LLVMContext::MD_mmra:
242 Dest.setMetadata(ID, N);
243 break;
244 default:
245 if (ID == Ctx.getMDKindID("amdgpu.no.remote.memory"))
246 Dest.setMetadata(ID, N);
247 else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory"))
248 Dest.setMetadata(ID, N);
249
250 // Losing amdgpu.ignore.denormal.mode, but it doesn't matter for current
251 // uses.
252 break;
253 }
254 }
255}
256
257// Determine if a particular atomic operation has a supported size,
258// and is of appropriate alignment, to be passed through for target
259// lowering. (Versus turning into a __atomic libcall)
260template <typename Inst>
261static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
262 unsigned Size = getAtomicOpSize(I);
263 Align Alignment = I->getAlign();
264 return Alignment >= Size &&
266}
267
268bool AtomicExpandImpl::tryInsertTrailingSeqCstFence(Instruction *AtomicI) {
270 return false;
271
272 IRBuilder Builder(AtomicI);
273 if (auto *TrailingFence = TLI->emitTrailingFence(
274 Builder, AtomicI, AtomicOrdering::SequentiallyConsistent)) {
275 TrailingFence->moveAfter(AtomicI);
276 return true;
277 }
278 return false;
279}
280
281template <typename AtomicInst>
282bool AtomicExpandImpl::tryInsertFencesForAtomic(AtomicInst *AtomicI,
283 bool OrderingRequiresFence,
284 AtomicOrdering NewOrdering) {
285 bool ShouldInsertFences = TLI->shouldInsertFencesForAtomic(AtomicI);
286 if (OrderingRequiresFence && ShouldInsertFences) {
287 AtomicOrdering FenceOrdering = AtomicI->getOrdering();
288 AtomicI->setOrdering(NewOrdering);
289 return bracketInstWithFences(AtomicI, FenceOrdering);
290 }
291 if (!ShouldInsertFences)
292 return tryInsertTrailingSeqCstFence(AtomicI);
293 return false;
294}
295
296bool AtomicExpandImpl::processAtomicInstr(Instruction *I) {
297 if (auto *LI = dyn_cast<LoadInst>(I)) {
298 if (!LI->isAtomic())
299 return false;
300
301 if (!atomicSizeSupported(TLI, LI)) {
302 expandAtomicLoadToLibcall(LI);
303 return true;
304 }
305
306 bool MadeChange = false;
307 if (TLI->shouldCastAtomicLoadInIR(LI) ==
308 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
309 LI = convertAtomicLoadToIntegerType(LI);
310 MadeChange = true;
311 }
312
313 MadeChange |= tryInsertFencesForAtomic(
314 LI, isAcquireOrStronger(LI->getOrdering()), AtomicOrdering::Monotonic);
315
316 MadeChange |= tryExpandAtomicLoad(LI);
317 return MadeChange;
318 }
319
320 if (auto *SI = dyn_cast<StoreInst>(I)) {
321 if (!SI->isAtomic())
322 return false;
323
324 if (!atomicSizeSupported(TLI, SI)) {
325 expandAtomicStoreToLibcall(SI);
326 return true;
327 }
328
329 bool MadeChange = false;
330 if (TLI->shouldCastAtomicStoreInIR(SI) ==
331 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
332 SI = convertAtomicStoreToIntegerType(SI);
333 MadeChange = true;
334 }
335
336 MadeChange |= tryInsertFencesForAtomic(
337 SI, isReleaseOrStronger(SI->getOrdering()), AtomicOrdering::Monotonic);
338
339 MadeChange |= tryExpandAtomicStore(SI);
340 return MadeChange;
341 }
342
343 if (auto *RMWI = dyn_cast<AtomicRMWInst>(I)) {
344 if (!atomicSizeSupported(TLI, RMWI)) {
345 expandAtomicRMWToLibcall(RMWI);
346 return true;
347 }
348
349 bool MadeChange = false;
350 if (TLI->shouldCastAtomicRMWIInIR(RMWI) ==
351 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
352 RMWI = convertAtomicXchgToIntegerType(RMWI);
353 MadeChange = true;
354 }
355
356 MadeChange |= tryInsertFencesForAtomic(
357 RMWI,
358 isReleaseOrStronger(RMWI->getOrdering()) ||
359 isAcquireOrStronger(RMWI->getOrdering()),
361
362 // There are two different ways of expanding RMW instructions:
363 // - into a load if it is idempotent
364 // - into a Cmpxchg/LL-SC loop otherwise
365 // we try them in that order.
366 MadeChange |= (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) ||
367 tryExpandAtomicRMW(RMWI);
368 return MadeChange;
369 }
370
371 if (auto *CASI = dyn_cast<AtomicCmpXchgInst>(I)) {
372 if (!atomicSizeSupported(TLI, CASI)) {
373 expandAtomicCASToLibcall(CASI);
374 return true;
375 }
376
377 // TODO: when we're ready to make the change at the IR level, we can
378 // extend convertCmpXchgToInteger for floating point too.
379 bool MadeChange = false;
380 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
381 // TODO: add a TLI hook to control this so that each target can
382 // convert to lowering the original type one at a time.
383 CASI = convertCmpXchgToIntegerType(CASI);
384 MadeChange = true;
385 }
386
387 auto CmpXchgExpansion = TLI->shouldExpandAtomicCmpXchgInIR(CASI);
388 if (TLI->shouldInsertFencesForAtomic(CASI)) {
389 if (CmpXchgExpansion == TargetLoweringBase::AtomicExpansionKind::None &&
390 (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
391 isAcquireOrStronger(CASI->getSuccessOrdering()) ||
392 isAcquireOrStronger(CASI->getFailureOrdering()))) {
393 // If a compare and swap is lowered to LL/SC, we can do smarter fence
394 // insertion, with a stronger one on the success path than on the
395 // failure path. As a result, fence insertion is directly done by
396 // expandAtomicCmpXchg in that case.
397 AtomicOrdering FenceOrdering = CASI->getMergedOrdering();
398 AtomicOrdering CASOrdering =
400 CASI->setSuccessOrdering(CASOrdering);
401 CASI->setFailureOrdering(CASOrdering);
402 MadeChange |= bracketInstWithFences(CASI, FenceOrdering);
403 }
404 } else if (CmpXchgExpansion !=
405 TargetLoweringBase::AtomicExpansionKind::LLSC) {
406 // CmpXchg LLSC is handled in expandAtomicCmpXchg().
407 MadeChange |= tryInsertTrailingSeqCstFence(CASI);
408 }
409
410 MadeChange |= tryExpandAtomicCmpXchg(CASI);
411 return MadeChange;
412 }
413
414 return false;
415}
416
417bool AtomicExpandImpl::run(
418 Function &F, const LibcallLoweringModuleAnalysisResult &LibcallResult,
419 const TargetMachine *TM) {
420 const auto *Subtarget = TM->getSubtargetImpl(F);
421 if (!Subtarget->enableAtomicExpand())
422 return false;
423 TLI = Subtarget->getTargetLowering();
424 LibcallLowering = &LibcallResult.getLibcallLowering(*Subtarget);
425 DL = &F.getDataLayout();
426
427 bool MadeChange = false;
428
429 for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
430 BasicBlock *BB = &*BBI;
431
433
434 for (BasicBlock::reverse_iterator I = BB->rbegin(), E = BB->rend(); I != E;
435 I = Next) {
436 Instruction &Inst = *I;
437 Next = std::next(I);
438
439 if (processAtomicInstr(&Inst)) {
440 MadeChange = true;
441
442 // New blocks may have been inserted.
443 BBE = F.end();
444 }
445 }
446 }
447
448 return MadeChange;
449}
450
451bool AtomicExpandLegacy::runOnFunction(Function &F) {
452
453 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
454 if (!TPC)
455 return false;
456 auto *TM = &TPC->getTM<TargetMachine>();
457
458 const LibcallLoweringModuleAnalysisResult &LibcallResult =
459 getAnalysis<LibcallLoweringInfoWrapper>().getResult(*F.getParent());
460 AtomicExpandImpl AE;
461 return AE.run(F, LibcallResult, TM);
462}
463
465 return new AtomicExpandLegacy();
466}
467
470 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
471
472 const LibcallLoweringModuleAnalysisResult *LibcallResult =
473 MAMProxy.getCachedResult<LibcallLoweringModuleAnalysis>(*F.getParent());
474
475 if (!LibcallResult) {
476 F.getContext().emitError("'" + LibcallLoweringModuleAnalysis::name() +
477 "' analysis required");
478 return PreservedAnalyses::all();
479 }
480
481 AtomicExpandImpl AE;
482
483 bool Changed = AE.run(F, *LibcallResult, TM);
484 if (!Changed)
485 return PreservedAnalyses::all();
486
488}
489
490bool AtomicExpandImpl::bracketInstWithFences(Instruction *I,
491 AtomicOrdering Order) {
492 ReplacementIRBuilder Builder(I, *DL);
493
494 auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
495
496 auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
497 // We have a guard here because not every atomic operation generates a
498 // trailing fence.
499 if (TrailingFence)
500 TrailingFence->moveAfter(I);
501
502 return (LeadingFence || TrailingFence);
503}
504
505/// Get the iX type with the same bitwidth as T.
507AtomicExpandImpl::getCorrespondingIntegerType(Type *T, const DataLayout &DL) {
508 EVT VT = TLI->getMemValueType(DL, T);
509 unsigned BitWidth = VT.getStoreSizeInBits();
510 assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
511 return IntegerType::get(T->getContext(), BitWidth);
512}
513
514/// Convert an atomic load of a non-integral type to an integer load of the
515/// equivalent bitwidth. See the function comment on
516/// convertAtomicStoreToIntegerType for background.
517LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) {
518 auto *M = LI->getModule();
519 Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());
520
521 ReplacementIRBuilder Builder(LI, *DL);
522
523 Value *Addr = LI->getPointerOperand();
524
525 auto *NewLI = Builder.CreateLoad(NewTy, Addr);
526 NewLI->setAlignment(LI->getAlign());
527 NewLI->setVolatile(LI->isVolatile());
528 NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
529 LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
530
531 Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
532 LI->replaceAllUsesWith(NewVal);
533 LI->eraseFromParent();
534 return NewLI;
535}
536
537AtomicRMWInst *
538AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
540
541 auto *M = RMWI->getModule();
542 Type *NewTy =
543 getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
544
545 ReplacementIRBuilder Builder(RMWI, *DL);
546
547 Value *Addr = RMWI->getPointerOperand();
548 Value *Val = RMWI->getValOperand();
549 Value *NewVal = Val->getType()->isPointerTy()
550 ? Builder.CreatePtrToInt(Val, NewTy)
551 : Builder.CreateBitCast(Val, NewTy);
552
553 auto *NewRMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Addr, NewVal,
554 RMWI->getAlign(), RMWI->getOrdering(),
555 RMWI->getSyncScopeID());
556 NewRMWI->setVolatile(RMWI->isVolatile());
557 copyMetadataForAtomic(*NewRMWI, *RMWI);
558 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
559
560 Value *NewRVal = RMWI->getType()->isPointerTy()
561 ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType())
562 : Builder.CreateBitCast(NewRMWI, RMWI->getType());
563 RMWI->replaceAllUsesWith(NewRVal);
564 RMWI->eraseFromParent();
565 return NewRMWI;
566}
567
568bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
569 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
570 case TargetLoweringBase::AtomicExpansionKind::None:
571 return false;
572 case TargetLoweringBase::AtomicExpansionKind::LLSC:
573 expandAtomicOpToLLSC(
574 LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
575 LI->getOrdering(),
576 [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });
577 return true;
578 case TargetLoweringBase::AtomicExpansionKind::LLOnly:
579 return expandAtomicLoadToLL(LI);
580 case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
581 return expandAtomicLoadToCmpXchg(LI);
582 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
583 LI->setAtomic(AtomicOrdering::NotAtomic);
584 return true;
585 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
586 TLI->emitExpandAtomicLoad(LI);
587 return true;
588 default:
589 llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
590 }
591}
592
593bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
594 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
595 case TargetLoweringBase::AtomicExpansionKind::None:
596 return false;
597 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
598 TLI->emitExpandAtomicStore(SI);
599 return true;
600 case TargetLoweringBase::AtomicExpansionKind::Expand:
601 expandAtomicStoreToXChg(SI);
602 return true;
603 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
604 SI->setAtomic(AtomicOrdering::NotAtomic);
605 return true;
606 default:
607 llvm_unreachable("Unhandled case in tryExpandAtomicStore");
608 }
609}
610
611bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) {
612 ReplacementIRBuilder Builder(LI, *DL);
613
614 // On some architectures, load-linked instructions are atomic for larger
615 // sizes than normal loads. For example, the only 64-bit load guaranteed
616 // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
617 Value *Val = TLI->emitLoadLinked(Builder, LI->getType(),
618 LI->getPointerOperand(), LI->getOrdering());
620
621 LI->replaceAllUsesWith(Val);
622 LI->eraseFromParent();
623
624 return true;
625}
626
627bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
628 ReplacementIRBuilder Builder(LI, *DL);
629 AtomicOrdering Order = LI->getOrdering();
630 if (Order == AtomicOrdering::Unordered)
631 Order = AtomicOrdering::Monotonic;
632
633 Value *Addr = LI->getPointerOperand();
634 Type *Ty = LI->getType();
635 Constant *DummyVal = Constant::getNullValue(Ty);
636
637 Value *Pair = Builder.CreateAtomicCmpXchg(
638 Addr, DummyVal, DummyVal, LI->getAlign(), Order,
640 Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
641
642 LI->replaceAllUsesWith(Loaded);
643 LI->eraseFromParent();
644
645 return true;
646}
647
648/// Convert an atomic store of a non-integral type to an integer store of the
649/// equivalent bitwidth. We used to not support floating point or vector
650/// atomics in the IR at all. The backends learned to deal with the bitcast
651/// idiom because that was the only way of expressing the notion of a atomic
652/// float or vector store. The long term plan is to teach each backend to
653/// instruction select from the original atomic store, but as a migration
654/// mechanism, we convert back to the old format which the backends understand.
655/// Each backend will need individual work to recognize the new format.
656StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
657 ReplacementIRBuilder Builder(SI, *DL);
658 auto *M = SI->getModule();
659 Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
660 M->getDataLayout());
661 Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
662
663 Value *Addr = SI->getPointerOperand();
664
665 StoreInst *NewSI = Builder.CreateStore(NewVal, Addr);
666 NewSI->setAlignment(SI->getAlign());
667 NewSI->setVolatile(SI->isVolatile());
668 NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
669 LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
670 SI->eraseFromParent();
671 return NewSI;
672}
673
674void AtomicExpandImpl::expandAtomicStoreToXChg(StoreInst *SI) {
675 // This function is only called on atomic stores that are too large to be
676 // atomic if implemented as a native store. So we replace them by an
677 // atomic swap, that can be implemented for example as a ldrex/strex on ARM
678 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
679 // It is the responsibility of the target to only signal expansion via
680 // shouldExpandAtomicRMW in cases where this is required and possible.
681 ReplacementIRBuilder Builder(SI, *DL);
682 AtomicOrdering Ordering = SI->getOrdering();
683 assert(Ordering != AtomicOrdering::NotAtomic);
684 AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered
685 ? AtomicOrdering::Monotonic
686 : Ordering;
687 AtomicRMWInst *AI = Builder.CreateAtomicRMW(
688 AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
689 SI->getAlign(), RMWOrdering);
690 SI->eraseFromParent();
691
692 // Now we have an appropriate swap instruction, lower it as usual.
693 tryExpandAtomicRMW(AI);
694}
695
696static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr,
697 Value *Loaded, Value *NewVal, Align AddrAlign,
698 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
699 Value *&Success, Value *&NewLoaded,
700 Instruction *MetadataSrc) {
701 Type *OrigTy = NewVal->getType();
702
703 // This code can go away when cmpxchg supports FP and vector types.
704 assert(!OrigTy->isPointerTy());
705 bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy();
706 if (NeedBitcast) {
707 IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
708 NewVal = Builder.CreateBitCast(NewVal, IntTy);
709 Loaded = Builder.CreateBitCast(Loaded, IntTy);
710 }
711
712 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
713 Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
715 if (MetadataSrc)
716 copyMetadataForAtomic(*Pair, *MetadataSrc);
717
718 Success = Builder.CreateExtractValue(Pair, 1, "success");
719 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
720
721 if (NeedBitcast)
722 NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
723}
724
725bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {
726 LLVMContext &Ctx = AI->getModule()->getContext();
727 TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
728 switch (Kind) {
729 case TargetLoweringBase::AtomicExpansionKind::None:
730 return false;
731 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
732 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
733 unsigned ValueSize = getAtomicOpSize(AI);
734 if (ValueSize < MinCASSize) {
735 expandPartwordAtomicRMW(AI,
736 TargetLoweringBase::AtomicExpansionKind::LLSC);
737 } else {
738 auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
739 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
740 AI->getValOperand());
741 };
742 expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
743 AI->getAlign(), AI->getOrdering(), PerformOp);
744 }
745 return true;
746 }
747 case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
748 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
749 unsigned ValueSize = getAtomicOpSize(AI);
750 if (ValueSize < MinCASSize) {
751 expandPartwordAtomicRMW(AI,
752 TargetLoweringBase::AtomicExpansionKind::CmpXChg);
753 } else {
755 Ctx.getSyncScopeNames(SSNs);
756 auto MemScope = SSNs[AI->getSyncScopeID()].empty()
757 ? "system"
758 : SSNs[AI->getSyncScopeID()];
759 OptimizationRemarkEmitter ORE(AI->getFunction());
760 ORE.emit([&]() {
761 return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
762 << "A compare and swap loop was generated for an atomic "
763 << AI->getOperationName(AI->getOperation()) << " operation at "
764 << MemScope << " memory scope";
765 });
766 expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
767 }
768 return true;
769 }
770 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
771 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
772 unsigned ValueSize = getAtomicOpSize(AI);
773 if (ValueSize < MinCASSize) {
775 // Widen And/Or/Xor and give the target another chance at expanding it.
778 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
779 return true;
780 }
781 }
782 expandAtomicRMWToMaskedIntrinsic(AI);
783 return true;
784 }
785 case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: {
787 return true;
788 }
789 case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: {
791 return true;
792 }
793 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
794 return lowerAtomicRMWInst(AI);
795 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
796 TLI->emitExpandAtomicRMW(AI);
797 return true;
798 default:
799 llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
800 }
801}
802
803namespace {
804
805struct PartwordMaskValues {
806 // These three fields are guaranteed to be set by createMaskInstrs.
807 Type *WordType = nullptr;
808 Type *ValueType = nullptr;
809 Type *IntValueType = nullptr;
810 Value *AlignedAddr = nullptr;
811 Align AlignedAddrAlignment;
812 // The remaining fields can be null.
813 Value *ShiftAmt = nullptr;
814 Value *Mask = nullptr;
815 Value *Inv_Mask = nullptr;
816};
817
818[[maybe_unused]]
819raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
820 auto PrintObj = [&O](auto *V) {
821 if (V)
822 O << *V;
823 else
824 O << "nullptr";
825 O << '\n';
826 };
827 O << "PartwordMaskValues {\n";
828 O << " WordType: ";
829 PrintObj(PMV.WordType);
830 O << " ValueType: ";
831 PrintObj(PMV.ValueType);
832 O << " AlignedAddr: ";
833 PrintObj(PMV.AlignedAddr);
834 O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
835 O << " ShiftAmt: ";
836 PrintObj(PMV.ShiftAmt);
837 O << " Mask: ";
838 PrintObj(PMV.Mask);
839 O << " Inv_Mask: ";
840 PrintObj(PMV.Inv_Mask);
841 O << "}\n";
842 return O;
843}
844
845} // end anonymous namespace
846
847/// This is a helper function which builds instructions to provide
848/// values necessary for partword atomic operations. It takes an
849/// incoming address, Addr, and ValueType, and constructs the address,
850/// shift-amounts and masks needed to work with a larger value of size
851/// WordSize.
852///
853/// AlignedAddr: Addr rounded down to a multiple of WordSize
854///
855/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
856/// from AlignAddr for it to have the same value as if
857/// ValueType was loaded from Addr.
858///
859/// Mask: Value to mask with the value loaded from AlignAddr to
860/// include only the part that would've been loaded from Addr.
861///
862/// Inv_Mask: The inverse of Mask.
863static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
865 Value *Addr, Align AddrAlign,
866 unsigned MinWordSize) {
867 PartwordMaskValues PMV;
868
869 Module *M = I->getModule();
870 LLVMContext &Ctx = M->getContext();
871 const DataLayout &DL = M->getDataLayout();
872 unsigned ValueSize = DL.getTypeStoreSize(ValueType);
873
874 PMV.ValueType = PMV.IntValueType = ValueType;
875 if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy())
876 PMV.IntValueType =
877 Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits());
878
879 PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
880 : ValueType;
881 if (PMV.ValueType == PMV.WordType) {
882 PMV.AlignedAddr = Addr;
883 PMV.AlignedAddrAlignment = AddrAlign;
884 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
885 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true);
886 return PMV;
887 }
888
889 PMV.AlignedAddrAlignment = Align(MinWordSize);
890
891 assert(ValueSize < MinWordSize);
892
893 PointerType *PtrTy = cast<PointerType>(Addr->getType());
894 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
895 Value *PtrLSB;
896
897 if (AddrAlign < MinWordSize) {
898 PMV.AlignedAddr = Builder.CreateIntrinsic(
899 Intrinsic::ptrmask, {PtrTy, IntTy},
900 {Addr, ConstantInt::getSigned(IntTy, ~(uint64_t)(MinWordSize - 1))},
901 nullptr, "AlignedAddr");
902
903 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
904 PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
905 } else {
906 // If the alignment is high enough, the LSB are known 0.
907 PMV.AlignedAddr = Addr;
908 PtrLSB = ConstantInt::getNullValue(IntTy);
909 }
910
911 if (DL.isLittleEndian()) {
912 // turn bytes into bits
913 PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
914 } else {
915 // turn bytes into bits, and count from the other side.
916 PMV.ShiftAmt = Builder.CreateShl(
917 Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
918 }
919
920 PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
921 PMV.Mask = Builder.CreateShl(
922 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
923 "Mask");
924
925 PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
926
927 return PMV;
928}
929
930static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord,
931 const PartwordMaskValues &PMV) {
932 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
933 if (PMV.WordType == PMV.ValueType)
934 return WideWord;
935
936 Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
937 Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted");
938 return Builder.CreateBitCast(Trunc, PMV.ValueType);
939}
940
941static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord,
942 Value *Updated, const PartwordMaskValues &PMV) {
943 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
944 assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
945 if (PMV.WordType == PMV.ValueType)
946 return Updated;
947
948 Updated = Builder.CreateBitCast(Updated, PMV.IntValueType);
949
950 Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
951 Value *Shift =
952 Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
953 Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
954 Value *Or = Builder.CreateOr(And, Shift, "inserted");
955 return Or;
956}
957
958/// Emit IR to implement a masked version of a given atomicrmw
959/// operation. (That is, only the bits under the Mask should be
960/// affected by the operation)
962 IRBuilderBase &Builder, Value *Loaded,
963 Value *Shifted_Inc, Value *Inc,
964 const PartwordMaskValues &PMV) {
965 // TODO: update to use
966 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
967 // to merge bits from two values without requiring PMV.Inv_Mask.
968 switch (Op) {
969 case AtomicRMWInst::Xchg: {
970 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
971 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
972 return FinalVal;
973 }
977 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
980 case AtomicRMWInst::Nand: {
981 // The other arithmetic ops need to be masked into place.
982 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);
983 Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
984 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
985 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
986 return FinalVal;
987 }
1004 // Finally, other ops will operate on the full value, so truncate down to
1005 // the original size, and expand out again after doing the
1006 // operation. Bitcasts will be inserted for FP values.
1007 Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
1008 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
1009 Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
1010 return FinalVal;
1011 }
1012 default:
1013 llvm_unreachable("Unknown atomic op");
1014 }
1015}
1016
1017/// Expand a sub-word atomicrmw operation into an appropriate
1018/// word-sized operation.
1019///
1020/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
1021/// way as a typical atomicrmw expansion. The only difference here is
1022/// that the operation inside of the loop may operate upon only a
1023/// part of the value.
1024void AtomicExpandImpl::expandPartwordAtomicRMW(
1025 AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
1026 // Widen And/Or/Xor and give the target another chance at expanding it.
1030 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
1031 return;
1032 }
1033 AtomicOrdering MemOpOrder = AI->getOrdering();
1034 SyncScope::ID SSID = AI->getSyncScopeID();
1035
1036 ReplacementIRBuilder Builder(AI, *DL);
1037
1038 PartwordMaskValues PMV =
1039 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1040 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1041
1042 Value *ValOperand_Shifted = nullptr;
1045 Value *ValOp = Builder.CreateBitCast(AI->getValOperand(), PMV.IntValueType);
1046 ValOperand_Shifted =
1047 Builder.CreateShl(Builder.CreateZExt(ValOp, PMV.WordType), PMV.ShiftAmt,
1048 "ValOperand_Shifted");
1049 }
1050
1051 auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
1052 return performMaskedAtomicOp(Op, Builder, Loaded, ValOperand_Shifted,
1053 AI->getValOperand(), PMV);
1054 };
1055
1056 Value *OldResult;
1057 if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
1058 OldResult = insertRMWCmpXchgLoop(
1059 Builder, PMV.WordType, PMV.AlignedAddr, PMV.AlignedAddrAlignment,
1060 MemOpOrder, SSID, PerformPartwordOp, createCmpXchgInstFun, AI);
1061 } else {
1062 assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
1063 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
1064 PMV.AlignedAddrAlignment, MemOpOrder,
1065 PerformPartwordOp);
1066 }
1067
1068 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1069 AI->replaceAllUsesWith(FinalOldResult);
1070 AI->eraseFromParent();
1071}
1072
1073// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
1074AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
1075 ReplacementIRBuilder Builder(AI, *DL);
1077
1079 Op == AtomicRMWInst::And) &&
1080 "Unable to widen operation");
1081
1082 PartwordMaskValues PMV =
1083 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1084 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1085
1086 Value *ValOperand_Shifted =
1087 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
1088 PMV.ShiftAmt, "ValOperand_Shifted");
1089
1090 Value *NewOperand;
1091
1092 if (Op == AtomicRMWInst::And)
1093 NewOperand =
1094 Builder.CreateOr(ValOperand_Shifted, PMV.Inv_Mask, "AndOperand");
1095 else
1096 NewOperand = ValOperand_Shifted;
1097
1098 AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
1099 Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
1100 AI->getOrdering(), AI->getSyncScopeID());
1101
1102 copyMetadataForAtomic(*NewAI, *AI);
1103
1104 Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
1105 AI->replaceAllUsesWith(FinalOldResult);
1106 AI->eraseFromParent();
1107 return NewAI;
1108}
1109
1110bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
1111 // The basic idea here is that we're expanding a cmpxchg of a
1112 // smaller memory size up to a word-sized cmpxchg. To do this, we
1113 // need to add a retry-loop for strong cmpxchg, so that
1114 // modifications to other parts of the word don't cause a spurious
1115 // failure.
1116
1117 // This generates code like the following:
1118 // [[Setup mask values PMV.*]]
1119 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
1120 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
1121 // %InitLoaded = load i32* %addr
1122 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
1123 // br partword.cmpxchg.loop
1124 // partword.cmpxchg.loop:
1125 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
1126 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
1127 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
1128 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
1129 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
1130 // i32 %FullWord_NewVal success_ordering failure_ordering
1131 // %OldVal = extractvalue { i32, i1 } %NewCI, 0
1132 // %Success = extractvalue { i32, i1 } %NewCI, 1
1133 // br i1 %Success, label %partword.cmpxchg.end,
1134 // label %partword.cmpxchg.failure
1135 // partword.cmpxchg.failure:
1136 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
1137 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
1138 // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
1139 // label %partword.cmpxchg.end
1140 // partword.cmpxchg.end:
1141 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
1142 // %FinalOldVal = trunc i32 %tmp1 to i8
1143 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
1144 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
1145
1146 Value *Addr = CI->getPointerOperand();
1147 Value *Cmp = CI->getCompareOperand();
1148 Value *NewVal = CI->getNewValOperand();
1149
1150 BasicBlock *BB = CI->getParent();
1151 Function *F = BB->getParent();
1152 ReplacementIRBuilder Builder(CI, *DL);
1153 LLVMContext &Ctx = Builder.getContext();
1154
1155 BasicBlock *EndBB =
1156 BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
1157 auto FailureBB =
1158 BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
1159 auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
1160
1161 // The split call above "helpfully" added a branch at the end of BB
1162 // (to the wrong place).
1163 std::prev(BB->end())->eraseFromParent();
1164 Builder.SetInsertPoint(BB);
1165
1166 PartwordMaskValues PMV =
1167 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1168 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1169
1170 // Shift the incoming values over, into the right location in the word.
1171 Value *NewVal_Shifted =
1172 Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
1173 Value *Cmp_Shifted =
1174 Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
1175
1176 // Load the entire current word, and mask into place the expected and new
1177 // values
1178 LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
1179 InitLoaded->setVolatile(CI->isVolatile());
1180 Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
1181 Builder.CreateBr(LoopBB);
1182
1183 // partword.cmpxchg.loop:
1184 Builder.SetInsertPoint(LoopBB);
1185 PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
1186 Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
1187
1188 // Mask/Or the expected and new values into place in the loaded word.
1189 Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
1190 Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
1191 AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
1192 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1194 NewCI->setVolatile(CI->isVolatile());
1195 // When we're building a strong cmpxchg, we need a loop, so you
1196 // might think we could use a weak cmpxchg inside. But, using strong
1197 // allows the below comparison for ShouldContinue, and we're
1198 // expecting the underlying cmpxchg to be a machine instruction,
1199 // which is strong anyways.
1200 NewCI->setWeak(CI->isWeak());
1201
1202 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1203 Value *Success = Builder.CreateExtractValue(NewCI, 1);
1204
1205 if (CI->isWeak())
1206 Builder.CreateBr(EndBB);
1207 else
1208 Builder.CreateCondBr(Success, EndBB, FailureBB);
1209
1210 // partword.cmpxchg.failure:
1211 Builder.SetInsertPoint(FailureBB);
1212 // Upon failure, verify that the masked-out part of the loaded value
1213 // has been modified. If it didn't, abort the cmpxchg, since the
1214 // masked-in part must've.
1215 Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
1216 Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
1217 Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
1218
1219 // Add the second value to the phi from above
1220 Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
1221
1222 // partword.cmpxchg.end:
1223 Builder.SetInsertPoint(CI);
1224
1225 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1226 Value *Res = PoisonValue::get(CI->getType());
1227 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1228 Res = Builder.CreateInsertValue(Res, Success, 1);
1229
1230 CI->replaceAllUsesWith(Res);
1231 CI->eraseFromParent();
1232 return true;
1233}
1234
1235void AtomicExpandImpl::expandAtomicOpToLLSC(
1236 Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
1237 AtomicOrdering MemOpOrder,
1238 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1239 ReplacementIRBuilder Builder(I, *DL);
1240 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
1241 MemOpOrder, PerformOp);
1242
1243 I->replaceAllUsesWith(Loaded);
1244 I->eraseFromParent();
1245}
1246
1247void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1248 ReplacementIRBuilder Builder(AI, *DL);
1249
1250 PartwordMaskValues PMV =
1251 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1252 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1253
1254 // The value operand must be sign-extended for signed min/max so that the
1255 // target's signed comparison instructions can be used. Otherwise, just
1256 // zero-ext.
1257 Instruction::CastOps CastOp = Instruction::ZExt;
1258 AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1259 if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
1260 CastOp = Instruction::SExt;
1261
1262 Value *ValOperand_Shifted = Builder.CreateShl(
1263 Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
1264 PMV.ShiftAmt, "ValOperand_Shifted");
1265 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1266 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1267 AI->getOrdering());
1268 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1269 AI->replaceAllUsesWith(FinalOldResult);
1270 AI->eraseFromParent();
1271}
1272
1273void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1274 AtomicCmpXchgInst *CI) {
1275 ReplacementIRBuilder Builder(CI, *DL);
1276
1277 PartwordMaskValues PMV = createMaskInstrs(
1278 Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
1279 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1280
1281 Value *CmpVal_Shifted = Builder.CreateShl(
1282 Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
1283 "CmpVal_Shifted");
1284 Value *NewVal_Shifted = Builder.CreateShl(
1285 Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
1286 "NewVal_Shifted");
1288 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1289 CI->getMergedOrdering());
1290 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1291 Value *Res = PoisonValue::get(CI->getType());
1292 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1293 Value *Success = Builder.CreateICmpEQ(
1294 CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
1295 Res = Builder.CreateInsertValue(Res, Success, 1);
1296
1297 CI->replaceAllUsesWith(Res);
1298 CI->eraseFromParent();
1299}
1300
1301Value *AtomicExpandImpl::insertRMWLLSCLoop(
1302 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1303 AtomicOrdering MemOpOrder,
1304 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1305 LLVMContext &Ctx = Builder.getContext();
1306 BasicBlock *BB = Builder.GetInsertBlock();
1307 Function *F = BB->getParent();
1308
1309 assert(AddrAlign >= F->getDataLayout().getTypeStoreSize(ResultTy) &&
1310 "Expected at least natural alignment at this point.");
1311
1312 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1313 //
1314 // The standard expansion we produce is:
1315 // [...]
1316 // atomicrmw.start:
1317 // %loaded = @load.linked(%addr)
1318 // %new = some_op iN %loaded, %incr
1319 // %stored = @store_conditional(%new, %addr)
1320 // %try_again = icmp i32 ne %stored, 0
1321 // br i1 %try_again, label %loop, label %atomicrmw.end
1322 // atomicrmw.end:
1323 // [...]
1324 BasicBlock *ExitBB =
1325 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1326 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1327
1328 // The split call above "helpfully" added a branch at the end of BB (to the
1329 // wrong place).
1330 std::prev(BB->end())->eraseFromParent();
1331 Builder.SetInsertPoint(BB);
1332 Builder.CreateBr(LoopBB);
1333
1334 // Start the main loop block now that we've taken care of the preliminaries.
1335 Builder.SetInsertPoint(LoopBB);
1336 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder);
1337
1338 Value *NewVal = PerformOp(Builder, Loaded);
1339
1340 Value *StoreSuccess =
1341 TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1342 Value *TryAgain = Builder.CreateICmpNE(
1343 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1344
1345 Instruction *CondBr = Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1346
1347 // Atomic RMW expands to a Load-linked / Store-Conditional loop, because it is
1348 // hard to predict precise branch weigths we mark the branch as "unknown"
1349 // (50/50) to prevent misleading optimizations.
1351
1352 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1353 return Loaded;
1354}
1355
1356/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1357/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1358/// IR. As a migration step, we convert back to what use to be the standard
1359/// way to represent a pointer cmpxchg so that we can update backends one by
1360/// one.
1361AtomicCmpXchgInst *
1362AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1363 auto *M = CI->getModule();
1364 Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1365 M->getDataLayout());
1366
1367 ReplacementIRBuilder Builder(CI, *DL);
1368
1369 Value *Addr = CI->getPointerOperand();
1370
1371 Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1372 Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1373
1374 auto *NewCI = Builder.CreateAtomicCmpXchg(
1375 Addr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
1376 CI->getFailureOrdering(), CI->getSyncScopeID());
1377 NewCI->setVolatile(CI->isVolatile());
1378 NewCI->setWeak(CI->isWeak());
1379 LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1380
1381 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1382 Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1383
1384 OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1385
1386 Value *Res = PoisonValue::get(CI->getType());
1387 Res = Builder.CreateInsertValue(Res, OldVal, 0);
1388 Res = Builder.CreateInsertValue(Res, Succ, 1);
1389
1390 CI->replaceAllUsesWith(Res);
1391 CI->eraseFromParent();
1392 return NewCI;
1393}
1394
1395bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1396 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1397 AtomicOrdering FailureOrder = CI->getFailureOrdering();
1398 Value *Addr = CI->getPointerOperand();
1399 BasicBlock *BB = CI->getParent();
1400 Function *F = BB->getParent();
1401 LLVMContext &Ctx = F->getContext();
1402 // If shouldInsertFencesForAtomic() returns true, then the target does not
1403 // want to deal with memory orders, and emitLeading/TrailingFence should take
1404 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1405 // should preserve the ordering.
1406 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1407 AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1408 ? AtomicOrdering::Monotonic
1409 : CI->getMergedOrdering();
1410
1411 // In implementations which use a barrier to achieve release semantics, we can
1412 // delay emitting this barrier until we know a store is actually going to be
1413 // attempted. The cost of this delay is that we need 2 copies of the block
1414 // emitting the load-linked, affecting code size.
1415 //
1416 // Ideally, this logic would be unconditional except for the minsize check
1417 // since in other cases the extra blocks naturally collapse down to the
1418 // minimal loop. Unfortunately, this puts too much stress on later
1419 // optimisations so we avoid emitting the extra logic in those cases too.
1420 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1421 SuccessOrder != AtomicOrdering::Monotonic &&
1422 SuccessOrder != AtomicOrdering::Acquire &&
1423 !F->hasMinSize();
1424
1425 // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1426 // do it even on minsize.
1427 bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1428
1429 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1430 //
1431 // The full expansion we produce is:
1432 // [...]
1433 // %aligned.addr = ...
1434 // cmpxchg.start:
1435 // %unreleasedload = @load.linked(%aligned.addr)
1436 // %unreleasedload.extract = extract value from %unreleasedload
1437 // %should_store = icmp eq %unreleasedload.extract, %desired
1438 // br i1 %should_store, label %cmpxchg.releasingstore,
1439 // label %cmpxchg.nostore
1440 // cmpxchg.releasingstore:
1441 // fence?
1442 // br label cmpxchg.trystore
1443 // cmpxchg.trystore:
1444 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1445 // [%releasedload, %cmpxchg.releasedload]
1446 // %updated.new = insert %new into %loaded.trystore
1447 // %stored = @store_conditional(%updated.new, %aligned.addr)
1448 // %success = icmp eq i32 %stored, 0
1449 // br i1 %success, label %cmpxchg.success,
1450 // label %cmpxchg.releasedload/%cmpxchg.failure
1451 // cmpxchg.releasedload:
1452 // %releasedload = @load.linked(%aligned.addr)
1453 // %releasedload.extract = extract value from %releasedload
1454 // %should_store = icmp eq %releasedload.extract, %desired
1455 // br i1 %should_store, label %cmpxchg.trystore,
1456 // label %cmpxchg.failure
1457 // cmpxchg.success:
1458 // fence?
1459 // br label %cmpxchg.end
1460 // cmpxchg.nostore:
1461 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1462 // [%releasedload,
1463 // %cmpxchg.releasedload/%cmpxchg.trystore]
1464 // @load_linked_fail_balance()?
1465 // br label %cmpxchg.failure
1466 // cmpxchg.failure:
1467 // fence?
1468 // br label %cmpxchg.end
1469 // cmpxchg.end:
1470 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1471 // [%loaded.trystore, %cmpxchg.trystore]
1472 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1473 // %loaded = extract value from %loaded.exit
1474 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1475 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1476 // [...]
1477 BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1478 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1479 auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1480 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1481 auto ReleasedLoadBB =
1482 BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1483 auto TryStoreBB =
1484 BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1485 auto ReleasingStoreBB =
1486 BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1487 auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1488
1489 ReplacementIRBuilder Builder(CI, *DL);
1490
1491 // The split call above "helpfully" added a branch at the end of BB (to the
1492 // wrong place), but we might want a fence too. It's easiest to just remove
1493 // the branch entirely.
1494 std::prev(BB->end())->eraseFromParent();
1495 Builder.SetInsertPoint(BB);
1496 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1497 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1498
1499 PartwordMaskValues PMV =
1500 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1501 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1502 Builder.CreateBr(StartBB);
1503
1504 // Start the main loop block now that we've taken care of the preliminaries.
1505 Builder.SetInsertPoint(StartBB);
1506 Value *UnreleasedLoad =
1507 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1508 Value *UnreleasedLoadExtract =
1509 extractMaskedValue(Builder, UnreleasedLoad, PMV);
1510 Value *ShouldStore = Builder.CreateICmpEQ(
1511 UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
1512
1513 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1514 // jump straight past that fence instruction (if it exists).
1515 Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB,
1516 MDBuilder(F->getContext()).createLikelyBranchWeights());
1517
1518 Builder.SetInsertPoint(ReleasingStoreBB);
1519 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1520 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1521 Builder.CreateBr(TryStoreBB);
1522
1523 Builder.SetInsertPoint(TryStoreBB);
1524 PHINode *LoadedTryStore =
1525 Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
1526 LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1527 Value *NewValueInsert =
1528 insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
1529 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1530 PMV.AlignedAddr, MemOpOrder);
1531 StoreSuccess = Builder.CreateICmpEQ(
1532 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1533 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1534 Builder.CreateCondBr(StoreSuccess, SuccessBB,
1535 CI->isWeak() ? FailureBB : RetryBB,
1536 MDBuilder(F->getContext()).createLikelyBranchWeights());
1537
1538 Builder.SetInsertPoint(ReleasedLoadBB);
1539 Value *SecondLoad;
1540 if (HasReleasedLoadBB) {
1541 SecondLoad =
1542 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1543 Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
1544 ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
1545 CI->getCompareOperand(), "should_store");
1546
1547 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1548 // jump straight past that fence instruction (if it exists).
1549 Builder.CreateCondBr(
1550 ShouldStore, TryStoreBB, NoStoreBB,
1551 MDBuilder(F->getContext()).createLikelyBranchWeights());
1552 // Update PHI node in TryStoreBB.
1553 LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
1554 } else
1555 Builder.CreateUnreachable();
1556
1557 // Make sure later instructions don't get reordered with a fence if
1558 // necessary.
1559 Builder.SetInsertPoint(SuccessBB);
1560 if (ShouldInsertFencesForAtomic ||
1562 TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1563 Builder.CreateBr(ExitBB);
1564
1565 Builder.SetInsertPoint(NoStoreBB);
1566 PHINode *LoadedNoStore =
1567 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
1568 LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
1569 if (HasReleasedLoadBB)
1570 LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
1571
1572 // In the failing case, where we don't execute the store-conditional, the
1573 // target might want to balance out the load-linked with a dedicated
1574 // instruction (e.g., on ARM, clearing the exclusive monitor).
1576 Builder.CreateBr(FailureBB);
1577
1578 Builder.SetInsertPoint(FailureBB);
1579 PHINode *LoadedFailure =
1580 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
1581 LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
1582 if (CI->isWeak())
1583 LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
1584 if (ShouldInsertFencesForAtomic)
1585 TLI->emitTrailingFence(Builder, CI, FailureOrder);
1586 Builder.CreateBr(ExitBB);
1587
1588 // Finally, we have control-flow based knowledge of whether the cmpxchg
1589 // succeeded or not. We expose this to later passes by converting any
1590 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1591 // PHI.
1592 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1593 PHINode *LoadedExit =
1594 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
1595 LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
1596 LoadedExit->addIncoming(LoadedFailure, FailureBB);
1597 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
1598 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1599 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1600
1601 // This is the "exit value" from the cmpxchg expansion. It may be of
1602 // a type wider than the one in the cmpxchg instruction.
1603 Value *LoadedFull = LoadedExit;
1604
1605 Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
1606 Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
1607
1608 // Look for any users of the cmpxchg that are just comparing the loaded value
1609 // against the desired one, and replace them with the CFG-derived version.
1611 for (auto *User : CI->users()) {
1612 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
1613 if (!EV)
1614 continue;
1615
1616 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1617 "weird extraction from { iN, i1 }");
1618
1619 if (EV->getIndices()[0] == 0)
1620 EV->replaceAllUsesWith(Loaded);
1621 else
1623
1624 PrunedInsts.push_back(EV);
1625 }
1626
1627 // We can remove the instructions now we're no longer iterating through them.
1628 for (auto *EV : PrunedInsts)
1629 EV->eraseFromParent();
1630
1631 if (!CI->use_empty()) {
1632 // Some use of the full struct return that we don't understand has happened,
1633 // so we've got to reconstruct it properly.
1634 Value *Res;
1635 Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0);
1636 Res = Builder.CreateInsertValue(Res, Success, 1);
1637
1638 CI->replaceAllUsesWith(Res);
1639 }
1640
1641 CI->eraseFromParent();
1642 return true;
1643}
1644
1645bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
1646 // TODO: Add floating point support.
1647 auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1648 if (!C)
1649 return false;
1650
1651 switch (RMWI->getOperation()) {
1652 case AtomicRMWInst::Add:
1653 case AtomicRMWInst::Sub:
1654 case AtomicRMWInst::Or:
1655 case AtomicRMWInst::Xor:
1656 return C->isZero();
1657 case AtomicRMWInst::And:
1658 return C->isMinusOne();
1659 case AtomicRMWInst::Min:
1660 return C->isMaxValue(true);
1661 case AtomicRMWInst::Max:
1662 return C->isMinValue(true);
1664 return C->isMaxValue(false);
1666 return C->isMinValue(false);
1667 default:
1668 return false;
1669 }
1670}
1671
1672bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
1673 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1674 tryExpandAtomicLoad(ResultingLoad);
1675 return true;
1676 }
1677 return false;
1678}
1679
1680Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
1681 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1682 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
1683 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
1684 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc) {
1685 LLVMContext &Ctx = Builder.getContext();
1686 BasicBlock *BB = Builder.GetInsertBlock();
1687 Function *F = BB->getParent();
1688
1689 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1690 //
1691 // The standard expansion we produce is:
1692 // [...]
1693 // %init_loaded = load atomic iN* %addr
1694 // br label %loop
1695 // loop:
1696 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1697 // %new = some_op iN %loaded, %incr
1698 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1699 // %new_loaded = extractvalue { iN, i1 } %pair, 0
1700 // %success = extractvalue { iN, i1 } %pair, 1
1701 // br i1 %success, label %atomicrmw.end, label %loop
1702 // atomicrmw.end:
1703 // [...]
1704 BasicBlock *ExitBB =
1705 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1706 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1707
1708 // The split call above "helpfully" added a branch at the end of BB (to the
1709 // wrong place), but we want a load. It's easiest to just remove
1710 // the branch entirely.
1711 std::prev(BB->end())->eraseFromParent();
1712 Builder.SetInsertPoint(BB);
1713 LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
1714 // TODO: The initial load must be atomic with the same synchronization scope
1715 // to avoid a data race with concurrent stores. If the instruction being
1716 // emulated is volatile, issue a volatile load.
1717 Builder.CreateBr(LoopBB);
1718
1719 // Start the main loop block now that we've taken care of the preliminaries.
1720 Builder.SetInsertPoint(LoopBB);
1721 PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1722 Loaded->addIncoming(InitLoaded, BB);
1723
1724 Value *NewVal = PerformOp(Builder, Loaded);
1725
1726 Value *NewLoaded = nullptr;
1727 Value *Success = nullptr;
1728
1729 CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1730 MemOpOrder == AtomicOrdering::Unordered
1731 ? AtomicOrdering::Monotonic
1732 : MemOpOrder,
1733 SSID, Success, NewLoaded, MetadataSrc);
1734 assert(Success && NewLoaded);
1735
1736 Loaded->addIncoming(NewLoaded, LoopBB);
1737
1738 Instruction *CondBr = Builder.CreateCondBr(Success, ExitBB, LoopBB);
1739
1740 // Atomic RMW expands to a cmpxchg loop, Since precise branch weights
1741 // cannot be easily determined here, we mark the branch as "unknown" (50/50)
1742 // to prevent misleading optimizations.
1744
1745 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1746 return NewLoaded;
1747}
1748
1749bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1750 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1751 unsigned ValueSize = getAtomicOpSize(CI);
1752
1753 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1754 default:
1755 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1756 case TargetLoweringBase::AtomicExpansionKind::None:
1757 if (ValueSize < MinCASSize)
1758 return expandPartwordCmpXchg(CI);
1759 return false;
1760 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
1761 return expandAtomicCmpXchg(CI);
1762 }
1763 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
1764 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1765 return true;
1766 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
1767 return lowerAtomicCmpXchgInst(CI);
1768 case TargetLoweringBase::AtomicExpansionKind::CustomExpand: {
1769 TLI->emitExpandAtomicCmpXchg(CI);
1770 return true;
1771 }
1772 }
1773}
1774
1775bool AtomicExpandImpl::expandAtomicRMWToCmpXchg(
1776 AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg) {
1777 ReplacementIRBuilder Builder(AI, AI->getDataLayout());
1778 Builder.setIsFPConstrained(
1779 AI->getFunction()->hasFnAttribute(Attribute::StrictFP));
1780
1781 // FIXME: If FP exceptions are observable, we should force them off for the
1782 // loop for the FP atomics.
1783 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
1784 Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
1785 AI->getOrdering(), AI->getSyncScopeID(),
1786 [&](IRBuilderBase &Builder, Value *Loaded) {
1787 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1788 AI->getValOperand());
1789 },
1790 CreateCmpXchg, /*MetadataSrc=*/AI);
1791
1792 AI->replaceAllUsesWith(Loaded);
1793 AI->eraseFromParent();
1794 return true;
1795}
1796
1797// In order to use one of the sized library calls such as
1798// __atomic_fetch_add_4, the alignment must be sufficient, the size
1799// must be one of the potentially-specialized sizes, and the value
1800// type must actually exist in C on the target (otherwise, the
1801// function wouldn't actually be defined.)
1802static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1803 const DataLayout &DL) {
1804 // TODO: "LargestSize" is an approximation for "largest type that
1805 // you can express in C". It seems to be the case that int128 is
1806 // supported on all 64-bit platforms, otherwise only up to 64-bit
1807 // integers are supported. If we get this wrong, then we'll try to
1808 // call a sized libcall that doesn't actually exist. There should
1809 // really be some more reliable way in LLVM of determining integer
1810 // sizes which are valid in the target's C ABI...
1811 unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1812 return Alignment >= Size &&
1813 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1814 Size <= LargestSize;
1815}
1816
1817void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) {
1818 static const RTLIB::Libcall Libcalls[6] = {
1819 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1820 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1821 unsigned Size = getAtomicOpSize(I);
1822
1823 bool expanded = expandAtomicOpToLibcall(
1824 I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
1825 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1826 if (!expanded)
1827 handleFailure(*I, "unsupported atomic load");
1828}
1829
1830void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {
1831 static const RTLIB::Libcall Libcalls[6] = {
1832 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1833 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1834 unsigned Size = getAtomicOpSize(I);
1835
1836 bool expanded = expandAtomicOpToLibcall(
1837 I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
1838 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1839 if (!expanded)
1840 handleFailure(*I, "unsupported atomic store");
1841}
1842
1843void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1844 static const RTLIB::Libcall Libcalls[6] = {
1845 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1846 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1847 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1848 unsigned Size = getAtomicOpSize(I);
1849
1850 bool expanded = expandAtomicOpToLibcall(
1851 I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
1852 I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
1853 Libcalls);
1854 if (!expanded)
1855 handleFailure(*I, "unsupported cmpxchg");
1856}
1857
1859 static const RTLIB::Libcall LibcallsXchg[6] = {
1860 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1861 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1862 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1863 static const RTLIB::Libcall LibcallsAdd[6] = {
1864 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1865 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1866 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1867 static const RTLIB::Libcall LibcallsSub[6] = {
1868 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1869 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1870 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1871 static const RTLIB::Libcall LibcallsAnd[6] = {
1872 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1873 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1874 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1875 static const RTLIB::Libcall LibcallsOr[6] = {
1876 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1877 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1878 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1879 static const RTLIB::Libcall LibcallsXor[6] = {
1880 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1881 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1882 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1883 static const RTLIB::Libcall LibcallsNand[6] = {
1884 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1885 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1886 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1887
1888 switch (Op) {
1890 llvm_unreachable("Should not have BAD_BINOP.");
1892 return ArrayRef(LibcallsXchg);
1893 case AtomicRMWInst::Add:
1894 return ArrayRef(LibcallsAdd);
1895 case AtomicRMWInst::Sub:
1896 return ArrayRef(LibcallsSub);
1897 case AtomicRMWInst::And:
1898 return ArrayRef(LibcallsAnd);
1899 case AtomicRMWInst::Or:
1900 return ArrayRef(LibcallsOr);
1901 case AtomicRMWInst::Xor:
1902 return ArrayRef(LibcallsXor);
1904 return ArrayRef(LibcallsNand);
1905 case AtomicRMWInst::Max:
1906 case AtomicRMWInst::Min:
1921 // No atomic libcalls are available for these.
1922 return {};
1923 }
1924 llvm_unreachable("Unexpected AtomicRMW operation.");
1925}
1926
1927void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1928 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
1929
1930 unsigned Size = getAtomicOpSize(I);
1931
1932 bool Success = false;
1933 if (!Libcalls.empty())
1934 Success = expandAtomicOpToLibcall(
1935 I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
1936 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1937
1938 // The expansion failed: either there were no libcalls at all for
1939 // the operation (min/max), or there were only size-specialized
1940 // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1941 // CAS libcall, via a CAS loop, instead.
1942 if (!Success) {
1943 expandAtomicRMWToCmpXchg(
1944 I, [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded,
1945 Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
1946 SyncScope::ID SSID, Value *&Success, Value *&NewLoaded,
1947 Instruction *MetadataSrc) {
1948 // Create the CAS instruction normally...
1949 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1950 Addr, Loaded, NewVal, Alignment, MemOpOrder,
1952 if (MetadataSrc)
1953 copyMetadataForAtomic(*Pair, *MetadataSrc);
1954
1955 Success = Builder.CreateExtractValue(Pair, 1, "success");
1956 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1957
1958 // ...and then expand the CAS into a libcall.
1959 expandAtomicCASToLibcall(Pair);
1960 });
1961 }
1962}
1963
1964// A helper routine for the above expandAtomic*ToLibcall functions.
1965//
1966// 'Libcalls' contains an array of enum values for the particular
1967// ATOMIC libcalls to be emitted. All of the other arguments besides
1968// 'I' are extracted from the Instruction subclass by the
1969// caller. Depending on the particular call, some will be null.
1970bool AtomicExpandImpl::expandAtomicOpToLibcall(
1971 Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
1972 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1973 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1974 assert(Libcalls.size() == 6);
1975
1976 LLVMContext &Ctx = I->getContext();
1977 Module *M = I->getModule();
1978 const DataLayout &DL = M->getDataLayout();
1979 IRBuilder<> Builder(I);
1980 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1981
1982 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
1983 Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1984
1985 if (M->getTargetTriple().isOSWindows() && M->getTargetTriple().isX86_64() &&
1986 Size == 16) {
1987 // x86_64 Windows passes i128 as an XMM vector; on return, it is in
1988 // XMM0, and as a parameter, it is passed indirectly. The generic lowering
1989 // rules handles this correctly if we pass it as a v2i64 rather than
1990 // i128. This is what Clang does in the frontend for such types as well
1991 // (see WinX86_64ABIInfo::classify in Clang).
1992 SizedIntTy = FixedVectorType::get(Type::getInt64Ty(Ctx), 2);
1993 }
1994
1995 const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
1996
1997 // TODO: the "order" argument type is "int", not int32. So
1998 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1999 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
2000 Constant *OrderingVal =
2001 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
2002 Constant *Ordering2Val = nullptr;
2003 if (CASExpected) {
2004 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
2005 Ordering2Val =
2006 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
2007 }
2008 bool HasResult = I->getType() != Type::getVoidTy(Ctx);
2009
2010 RTLIB::Libcall RTLibType;
2011 if (UseSizedLibcall) {
2012 switch (Size) {
2013 case 1:
2014 RTLibType = Libcalls[1];
2015 break;
2016 case 2:
2017 RTLibType = Libcalls[2];
2018 break;
2019 case 4:
2020 RTLibType = Libcalls[3];
2021 break;
2022 case 8:
2023 RTLibType = Libcalls[4];
2024 break;
2025 case 16:
2026 RTLibType = Libcalls[5];
2027 break;
2028 }
2029 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
2030 RTLibType = Libcalls[0];
2031 } else {
2032 // Can't use sized function, and there's no generic for this
2033 // operation, so give up.
2034 return false;
2035 }
2036
2037 RTLIB::LibcallImpl LibcallImpl = LibcallLowering->getLibcallImpl(RTLibType);
2038 if (LibcallImpl == RTLIB::Unsupported) {
2039 // This target does not implement the requested atomic libcall so give up.
2040 return false;
2041 }
2042
2043 // Build up the function call. There's two kinds. First, the sized
2044 // variants. These calls are going to be one of the following (with
2045 // N=1,2,4,8,16):
2046 // iN __atomic_load_N(iN *ptr, int ordering)
2047 // void __atomic_store_N(iN *ptr, iN val, int ordering)
2048 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
2049 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
2050 // int success_order, int failure_order)
2051 //
2052 // Note that these functions can be used for non-integer atomic
2053 // operations, the values just need to be bitcast to integers on the
2054 // way in and out.
2055 //
2056 // And, then, the generic variants. They look like the following:
2057 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
2058 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
2059 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
2060 // int ordering)
2061 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
2062 // void *desired, int success_order,
2063 // int failure_order)
2064 //
2065 // The different signatures are built up depending on the
2066 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
2067 // variables.
2068
2069 AllocaInst *AllocaCASExpected = nullptr;
2070 AllocaInst *AllocaValue = nullptr;
2071 AllocaInst *AllocaResult = nullptr;
2072
2073 Type *ResultTy;
2075 AttributeList Attr;
2076
2077 // 'size' argument.
2078 if (!UseSizedLibcall) {
2079 // Note, getIntPtrType is assumed equivalent to size_t.
2080 Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
2081 }
2082
2083 // 'ptr' argument.
2084 // note: This assumes all address spaces share a common libfunc
2085 // implementation and that addresses are convertable. For systems without
2086 // that property, we'd need to extend this mechanism to support AS-specific
2087 // families of atomic intrinsics.
2088 Value *PtrVal = PointerOperand;
2089 PtrVal = Builder.CreateAddrSpaceCast(PtrVal, PointerType::getUnqual(Ctx));
2090 Args.push_back(PtrVal);
2091
2092 // 'expected' argument, if present.
2093 if (CASExpected) {
2094 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
2095 AllocaCASExpected->setAlignment(AllocaAlignment);
2096 Builder.CreateLifetimeStart(AllocaCASExpected);
2097 Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
2098 Args.push_back(AllocaCASExpected);
2099 }
2100
2101 // 'val' argument ('desired' for cas), if present.
2102 if (ValueOperand) {
2103 if (UseSizedLibcall) {
2104 Value *IntValue =
2105 Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
2106 Args.push_back(IntValue);
2107 } else {
2108 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
2109 AllocaValue->setAlignment(AllocaAlignment);
2110 Builder.CreateLifetimeStart(AllocaValue);
2111 Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
2112 Args.push_back(AllocaValue);
2113 }
2114 }
2115
2116 // 'ret' argument.
2117 if (!CASExpected && HasResult && !UseSizedLibcall) {
2118 AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
2119 AllocaResult->setAlignment(AllocaAlignment);
2120 Builder.CreateLifetimeStart(AllocaResult);
2121 Args.push_back(AllocaResult);
2122 }
2123
2124 // 'ordering' ('success_order' for cas) argument.
2125 Args.push_back(OrderingVal);
2126
2127 // 'failure_order' argument, if present.
2128 if (Ordering2Val)
2129 Args.push_back(Ordering2Val);
2130
2131 // Now, the return type.
2132 if (CASExpected) {
2133 ResultTy = Type::getInt1Ty(Ctx);
2134 Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
2135 } else if (HasResult && UseSizedLibcall)
2136 ResultTy = SizedIntTy;
2137 else
2138 ResultTy = Type::getVoidTy(Ctx);
2139
2140 // Done with setting up arguments and return types, create the call:
2142 for (Value *Arg : Args)
2143 ArgTys.push_back(Arg->getType());
2144 FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
2145 FunctionCallee LibcallFn = M->getOrInsertFunction(
2147 Attr);
2148 CallInst *Call = Builder.CreateCall(LibcallFn, Args);
2149 Call->setAttributes(Attr);
2150 Value *Result = Call;
2151
2152 // And then, extract the results...
2153 if (ValueOperand && !UseSizedLibcall)
2154 Builder.CreateLifetimeEnd(AllocaValue);
2155
2156 if (CASExpected) {
2157 // The final result from the CAS is {load of 'expected' alloca, bool result
2158 // from call}
2159 Type *FinalResultTy = I->getType();
2160 Value *V = PoisonValue::get(FinalResultTy);
2161 Value *ExpectedOut = Builder.CreateAlignedLoad(
2162 CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
2163 Builder.CreateLifetimeEnd(AllocaCASExpected);
2164 V = Builder.CreateInsertValue(V, ExpectedOut, 0);
2165 V = Builder.CreateInsertValue(V, Result, 1);
2167 } else if (HasResult) {
2168 Value *V;
2169 if (UseSizedLibcall)
2170 V = Builder.CreateBitOrPointerCast(Result, I->getType());
2171 else {
2172 V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
2173 AllocaAlignment);
2174 Builder.CreateLifetimeEnd(AllocaResult);
2175 }
2177 }
2178 I->eraseFromParent();
2179 return true;
2180}
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded, Instruction *MetadataSrc)
Expand Atomic static false unsigned getAtomicOpSize(LoadInst *LI)
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
static void copyMetadataForAtomic(Instruction &Dest, const Instruction &Source)
Copy metadata that's safe to preserve when widening atomics.
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
Module.h This file contains the declarations for the Module class.
static bool isIdempotentRMW(AtomicRMWInst &RMWI)
Return true if and only if the given instruction does not modify the memory location referenced.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
#define T
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file contains the declarations for profiling metadata utility functions.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
void setAlignment(Align Align)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
void setWeak(bool IsWeak)
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ FSub
*p = old - v
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMaximumNum
*p = maximumnum(old, v) maximumnum matches the behavior of llvm.maximumnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
@ FMinimumNum
*p = minimumnum(old, v) minimumnum matches the behavior of llvm.minimumnum.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
static LLVM_ABI StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
iterator end()
Definition BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
reverse_iterator rbegin()
Definition BasicBlock.h:477
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
InstListType::reverse_iterator reverse_iterator
Definition BasicBlock.h:172
reverse_iterator rend()
Definition BasicBlock.h:479
void setAttributes(AttributeList A)
Set the attributes for this call.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
ArrayRef< unsigned > getIndices() const
unsigned getNumIndices() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:873
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
BasicBlockListType::iterator iterator
Definition Function.h:70
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Definition IRBuilder.h:1928
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2638
LLVM_ABI CallInst * CreateLifetimeStart(Value *Ptr)
Create a lifetime.start intrinsic.
LLVM_ABI CallInst * CreateLifetimeEnd(Value *Ptr)
Create a lifetime.end intrinsic.
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition IRBuilder.h:1894
CondBrInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1223
UnreachableInst * CreateUnreachable()
Definition IRBuilder.h:1365
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2631
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:202
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2194
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Definition IRBuilder.h:2233
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:201
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2335
UncondBrInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1217
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2281
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition IRBuilder.h:2496
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2331
void setIsFPConstrained(bool IsCon)
Enable/Disable use of constrained floating point math.
Definition IRBuilder.h:351
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1877
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1518
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2077
LLVMContext & getContext() const
Definition IRBuilder.h:203
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1577
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2189
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2510
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition IRBuilder.h:1941
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition IRBuilder.h:1913
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1599
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2204
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
Definition IRBuilder.h:75
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2811
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:354
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
LLVM_ABI void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
Tracks which library functions to use for a particular subtarget.
LLVM_ABI RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Return the lowering's selection of implementation call for Call.
Record a mapping from subtarget to LibcallLoweringInfo.
const LibcallLoweringInfo & getLibcallLowering(const TargetSubtargetInfo &Subtarget) const
An instruction for reading from memory.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
void setVolatile(bool V)
Specify whether this is a volatile load or not.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Metadata node.
Definition Metadata.h:1080
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
LLVMContext & getContext() const
Get the global data context.
Definition Module.h:285
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition Pass.cpp:112
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
virtual Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const
Perform a store-conditional operation to Addr.
EVT getMemValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
virtual void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const
Perform a bit test atomicrmw using a target-specific intrinsic.
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
virtual bool shouldInsertFencesForAtomic(const Instruction *I) const
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
virtual AtomicOrdering atomicOperationOrderAfterFenceSplit(const Instruction *I) const
virtual void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const
Perform a cmpxchg expansion using a target-specific method.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const
Perform a masked atomicrmw using a target-specific intrinsic.
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
virtual Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
virtual void emitExpandAtomicRMW(AtomicRMWInst *AI) const
Perform a atomicrmw expansion using a target-specific way.
virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const
virtual void emitExpandAtomicStore(StoreInst *SI) const
Perform a atomic store using a target-specific way.
virtual AtomicExpansionKind shouldCastAtomicRMWIInIR(AtomicRMWInst *RMWI) const
Returns how the given atomic atomicrmw should be cast by the IR-level AtomicExpand pass.
virtual bool shouldInsertTrailingSeqCstFenceForAtomicStore(const Instruction *I) const
Whether AtomicExpandPass should automatically insert a seq_cst trailing fence without reducing the or...
virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
virtual Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const
Perform a masked cmpxchg using a target-specific intrinsic.
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
virtual void emitExpandAtomicLoad(LoadInst *LI) const
Perform a atomic load using a target-specific way.
virtual AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
virtual void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const
Perform a atomicrmw which the result is only used by comparison, using a target-specific intrinsic.
virtual AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
virtual Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const
virtual AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
virtual Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const
Inserts in the IR a target-specific intrinsic specifying a fence.
virtual LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const
On some platforms, an AtomicRMW that never actually modifies the value (such as fetch_add of 0) can b...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:290
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:284
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:186
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:317
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:141
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
iterator_range< user_iterator > users()
Definition Value.h:427
bool use_empty() const
Definition Value.h:347
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool canInstructionHaveMMRAs(const Instruction &I)
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
bool isReleaseOrStronger(AtomicOrdering AO)
AtomicOrderingCABI toCABI(AtomicOrdering AO)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
AtomicOrdering
Atomic ordering for LLVM's memory model.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createAtomicExpandLegacyPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
LLVM_ABI char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:420
Matching combinators.
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.