LLVM 22.0.0git
AtomicExpandPass.cpp
Go to the documentation of this file.
1//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass (at IR level) to replace atomic instructions with
10// __atomic_* library calls, or target specific instruction which implement the
11// same semantics in a way which better fits the target backend. This can
12// include the use of (intrinsic-based) load-linked/store-conditional loops,
13// AtomicCmpXchg, or type coercions.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/ArrayRef.h"
28#include "llvm/IR/Attributes.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Constant.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/Function.h"
35#include "llvm/IR/IRBuilder.h"
36#include "llvm/IR/Instruction.h"
38#include "llvm/IR/MDBuilder.h"
40#include "llvm/IR/Module.h"
42#include "llvm/IR/Type.h"
43#include "llvm/IR/User.h"
44#include "llvm/IR/Value.h"
46#include "llvm/Pass.h"
49#include "llvm/Support/Debug.h"
54#include <cassert>
55#include <cstdint>
56#include <iterator>
57
58using namespace llvm;
59
60#define DEBUG_TYPE "atomic-expand"
61
62namespace {
63
64class AtomicExpandImpl {
65 const TargetLowering *TLI = nullptr;
66 const DataLayout *DL = nullptr;
67
68private:
69 void handleFailure(Instruction &FailedInst, const Twine &Msg) const {
70 LLVMContext &Ctx = FailedInst.getContext();
71
72 // TODO: Do not use generic error type.
73 Ctx.emitError(&FailedInst, Msg);
74
75 if (!FailedInst.getType()->isVoidTy())
76 FailedInst.replaceAllUsesWith(PoisonValue::get(FailedInst.getType()));
77 FailedInst.eraseFromParent();
78 }
79
80 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
81 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
82 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
83 bool tryExpandAtomicLoad(LoadInst *LI);
84 bool expandAtomicLoadToLL(LoadInst *LI);
85 bool expandAtomicLoadToCmpXchg(LoadInst *LI);
86 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
87 bool tryExpandAtomicStore(StoreInst *SI);
88 void expandAtomicStoreToXChg(StoreInst *SI);
89 bool tryExpandAtomicRMW(AtomicRMWInst *AI);
90 AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
91 Value *
92 insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,
93 Align AddrAlign, AtomicOrdering MemOpOrder,
94 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
95 void expandAtomicOpToLLSC(
96 Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
97 AtomicOrdering MemOpOrder,
98 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
99 void expandPartwordAtomicRMW(
101 AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
102 bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
103 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
104 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
105
106 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
107 static Value *insertRMWCmpXchgLoop(
108 IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
109 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
110 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
111 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc);
112 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
113
114 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
115 bool isIdempotentRMW(AtomicRMWInst *RMWI);
116 bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
117
118 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
119 Value *PointerOperand, Value *ValueOperand,
120 Value *CASExpected, AtomicOrdering Ordering,
121 AtomicOrdering Ordering2,
122 ArrayRef<RTLIB::Libcall> Libcalls);
123 void expandAtomicLoadToLibcall(LoadInst *LI);
124 void expandAtomicStoreToLibcall(StoreInst *LI);
125 void expandAtomicRMWToLibcall(AtomicRMWInst *I);
126 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
127
128 friend bool
130 CreateCmpXchgInstFun CreateCmpXchg);
131
132 bool processAtomicInstr(Instruction *I);
133
134public:
135 bool run(Function &F, const TargetMachine *TM);
136};
137
138class AtomicExpandLegacy : public FunctionPass {
139public:
140 static char ID; // Pass identification, replacement for typeid
141
142 AtomicExpandLegacy() : FunctionPass(ID) {
144 }
145
146 bool runOnFunction(Function &F) override;
147};
148
149// IRBuilder to be used for replacement atomic instructions.
150struct ReplacementIRBuilder
151 : IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> {
152 MDNode *MMRAMD = nullptr;
153
154 // Preserves the DebugLoc from I, and preserves still valid metadata.
155 // Enable StrictFP builder mode when appropriate.
156 explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
157 : IRBuilder(I->getContext(), InstSimplifyFolder(DL),
159 [this](Instruction *I) { addMMRAMD(I); })) {
160 SetInsertPoint(I);
161 this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections});
162 if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
163 this->setIsFPConstrained(true);
164
165 MMRAMD = I->getMetadata(LLVMContext::MD_mmra);
166 }
167
168 void addMMRAMD(Instruction *I) {
170 I->setMetadata(LLVMContext::MD_mmra, MMRAMD);
171 }
172};
173
174} // end anonymous namespace
175
176char AtomicExpandLegacy::ID = 0;
177
178char &llvm::AtomicExpandID = AtomicExpandLegacy::ID;
179
181 "Expand Atomic instructions", false, false)
183INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE,
184 "Expand Atomic instructions", false, false)
185
186// Helper functions to retrieve the size of atomic instructions.
187static unsigned getAtomicOpSize(LoadInst *LI) {
188 const DataLayout &DL = LI->getDataLayout();
189 return DL.getTypeStoreSize(LI->getType());
190}
191
192static unsigned getAtomicOpSize(StoreInst *SI) {
193 const DataLayout &DL = SI->getDataLayout();
194 return DL.getTypeStoreSize(SI->getValueOperand()->getType());
195}
196
197static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
198 const DataLayout &DL = RMWI->getDataLayout();
199 return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
200}
201
202static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
203 const DataLayout &DL = CASI->getDataLayout();
204 return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
205}
206
207/// Copy metadata that's safe to preserve when widening atomics.
209 const Instruction &Source) {
211 Source.getAllMetadata(MD);
212 LLVMContext &Ctx = Dest.getContext();
213 MDBuilder MDB(Ctx);
214
215 for (auto [ID, N] : MD) {
216 switch (ID) {
217 case LLVMContext::MD_dbg:
218 case LLVMContext::MD_tbaa:
219 case LLVMContext::MD_tbaa_struct:
220 case LLVMContext::MD_alias_scope:
221 case LLVMContext::MD_noalias:
222 case LLVMContext::MD_noalias_addrspace:
223 case LLVMContext::MD_access_group:
224 case LLVMContext::MD_mmra:
225 Dest.setMetadata(ID, N);
226 break;
227 default:
228 if (ID == Ctx.getMDKindID("amdgpu.no.remote.memory"))
229 Dest.setMetadata(ID, N);
230 else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory"))
231 Dest.setMetadata(ID, N);
232
233 // Losing amdgpu.ignore.denormal.mode, but it doesn't matter for current
234 // uses.
235 break;
236 }
237 }
238}
239
240// Determine if a particular atomic operation has a supported size,
241// and is of appropriate alignment, to be passed through for target
242// lowering. (Versus turning into a __atomic libcall)
243template <typename Inst>
244static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
245 unsigned Size = getAtomicOpSize(I);
246 Align Alignment = I->getAlign();
247 return Alignment >= Size &&
249}
250
251bool AtomicExpandImpl::processAtomicInstr(Instruction *I) {
252 auto *LI = dyn_cast<LoadInst>(I);
253 auto *SI = dyn_cast<StoreInst>(I);
254 auto *RMWI = dyn_cast<AtomicRMWInst>(I);
255 auto *CASI = dyn_cast<AtomicCmpXchgInst>(I);
256
257 bool MadeChange = false;
258
259 // If the Size/Alignment is not supported, replace with a libcall.
260 if (LI) {
261 if (!LI->isAtomic())
262 return false;
263
264 if (!atomicSizeSupported(TLI, LI)) {
265 expandAtomicLoadToLibcall(LI);
266 return true;
267 }
268
269 if (TLI->shouldCastAtomicLoadInIR(LI) ==
270 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
271 I = LI = convertAtomicLoadToIntegerType(LI);
272 MadeChange = true;
273 }
274 } else if (SI) {
275 if (!SI->isAtomic())
276 return false;
277
278 if (!atomicSizeSupported(TLI, SI)) {
279 expandAtomicStoreToLibcall(SI);
280 return true;
281 }
282
283 if (TLI->shouldCastAtomicStoreInIR(SI) ==
284 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
285 I = SI = convertAtomicStoreToIntegerType(SI);
286 MadeChange = true;
287 }
288 } else if (RMWI) {
289 if (!atomicSizeSupported(TLI, RMWI)) {
290 expandAtomicRMWToLibcall(RMWI);
291 return true;
292 }
293
294 if (TLI->shouldCastAtomicRMWIInIR(RMWI) ==
295 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
296 I = RMWI = convertAtomicXchgToIntegerType(RMWI);
297 MadeChange = true;
298 }
299 } else if (CASI) {
300 if (!atomicSizeSupported(TLI, CASI)) {
301 expandAtomicCASToLibcall(CASI);
302 return true;
303 }
304
305 // TODO: when we're ready to make the change at the IR level, we can
306 // extend convertCmpXchgToInteger for floating point too.
307 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
308 // TODO: add a TLI hook to control this so that each target can
309 // convert to lowering the original type one at a time.
310 I = CASI = convertCmpXchgToIntegerType(CASI);
311 MadeChange = true;
312 }
313 } else
314 return false;
315
316 if (TLI->shouldInsertFencesForAtomic(I)) {
317 auto FenceOrdering = AtomicOrdering::Monotonic;
318 if (LI && isAcquireOrStronger(LI->getOrdering())) {
319 FenceOrdering = LI->getOrdering();
320 LI->setOrdering(AtomicOrdering::Monotonic);
321 } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
322 FenceOrdering = SI->getOrdering();
323 SI->setOrdering(AtomicOrdering::Monotonic);
324 } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
325 isAcquireOrStronger(RMWI->getOrdering()))) {
326 FenceOrdering = RMWI->getOrdering();
327 RMWI->setOrdering(AtomicOrdering::Monotonic);
328 } else if (CASI &&
330 TargetLoweringBase::AtomicExpansionKind::None &&
331 (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
332 isAcquireOrStronger(CASI->getSuccessOrdering()) ||
333 isAcquireOrStronger(CASI->getFailureOrdering()))) {
334 // If a compare and swap is lowered to LL/SC, we can do smarter fence
335 // insertion, with a stronger one on the success path than on the
336 // failure path. As a result, fence insertion is directly done by
337 // expandAtomicCmpXchg in that case.
338 FenceOrdering = CASI->getMergedOrdering();
339 auto CASOrdering = TLI->atomicOperationOrderAfterFenceSplit(CASI);
340
341 CASI->setSuccessOrdering(CASOrdering);
342 CASI->setFailureOrdering(CASOrdering);
343 }
344
345 if (FenceOrdering != AtomicOrdering::Monotonic) {
346 MadeChange |= bracketInstWithFences(I, FenceOrdering);
347 }
348 } else if (I->hasAtomicStore() &&
350 auto FenceOrdering = AtomicOrdering::Monotonic;
351 if (SI)
352 FenceOrdering = SI->getOrdering();
353 else if (RMWI)
354 FenceOrdering = RMWI->getOrdering();
355 else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) !=
356 TargetLoweringBase::AtomicExpansionKind::LLSC)
357 // LLSC is handled in expandAtomicCmpXchg().
358 FenceOrdering = CASI->getSuccessOrdering();
359
360 IRBuilder Builder(I);
361 if (auto TrailingFence =
362 TLI->emitTrailingFence(Builder, I, FenceOrdering)) {
363 TrailingFence->moveAfter(I);
364 MadeChange = true;
365 }
366 }
367
368 if (LI)
369 MadeChange |= tryExpandAtomicLoad(LI);
370 else if (SI)
371 MadeChange |= tryExpandAtomicStore(SI);
372 else if (RMWI) {
373 // There are two different ways of expanding RMW instructions:
374 // - into a load if it is idempotent
375 // - into a Cmpxchg/LL-SC loop otherwise
376 // we try them in that order.
377
378 if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
379 MadeChange = true;
380
381 } else {
382 MadeChange |= tryExpandAtomicRMW(RMWI);
383 }
384 } else if (CASI)
385 MadeChange |= tryExpandAtomicCmpXchg(CASI);
386
387 return MadeChange;
388}
389
390bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM) {
391 const auto *Subtarget = TM->getSubtargetImpl(F);
392 if (!Subtarget->enableAtomicExpand())
393 return false;
394 TLI = Subtarget->getTargetLowering();
395 DL = &F.getDataLayout();
396
397 bool MadeChange = false;
398
399 for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
400 BasicBlock *BB = &*BBI;
401
403
404 for (BasicBlock::reverse_iterator I = BB->rbegin(), E = BB->rend(); I != E;
405 I = Next) {
406 Instruction &Inst = *I;
407 Next = std::next(I);
408
409 if (processAtomicInstr(&Inst)) {
410 MadeChange = true;
411
412 // New blocks may have been inserted.
413 BBE = F.end();
414 }
415 }
416 }
417
418 return MadeChange;
419}
420
421bool AtomicExpandLegacy::runOnFunction(Function &F) {
422
423 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
424 if (!TPC)
425 return false;
426 auto *TM = &TPC->getTM<TargetMachine>();
427 AtomicExpandImpl AE;
428 return AE.run(F, TM);
429}
430
432 return new AtomicExpandLegacy();
433}
434
437 AtomicExpandImpl AE;
438
439 bool Changed = AE.run(F, TM);
440 if (!Changed)
441 return PreservedAnalyses::all();
442
444}
445
446bool AtomicExpandImpl::bracketInstWithFences(Instruction *I,
447 AtomicOrdering Order) {
448 ReplacementIRBuilder Builder(I, *DL);
449
450 auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
451
452 auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
453 // We have a guard here because not every atomic operation generates a
454 // trailing fence.
455 if (TrailingFence)
456 TrailingFence->moveAfter(I);
457
458 return (LeadingFence || TrailingFence);
459}
460
461/// Get the iX type with the same bitwidth as T.
463AtomicExpandImpl::getCorrespondingIntegerType(Type *T, const DataLayout &DL) {
464 EVT VT = TLI->getMemValueType(DL, T);
465 unsigned BitWidth = VT.getStoreSizeInBits();
466 assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
467 return IntegerType::get(T->getContext(), BitWidth);
468}
469
470/// Convert an atomic load of a non-integral type to an integer load of the
471/// equivalent bitwidth. See the function comment on
472/// convertAtomicStoreToIntegerType for background.
473LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) {
474 auto *M = LI->getModule();
475 Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());
476
477 ReplacementIRBuilder Builder(LI, *DL);
478
479 Value *Addr = LI->getPointerOperand();
480
481 auto *NewLI = Builder.CreateLoad(NewTy, Addr);
482 NewLI->setAlignment(LI->getAlign());
483 NewLI->setVolatile(LI->isVolatile());
484 NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
485 LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
486
487 Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
488 LI->replaceAllUsesWith(NewVal);
489 LI->eraseFromParent();
490 return NewLI;
491}
492
493AtomicRMWInst *
494AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
496
497 auto *M = RMWI->getModule();
498 Type *NewTy =
499 getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
500
501 ReplacementIRBuilder Builder(RMWI, *DL);
502
503 Value *Addr = RMWI->getPointerOperand();
504 Value *Val = RMWI->getValOperand();
505 Value *NewVal = Val->getType()->isPointerTy()
506 ? Builder.CreatePtrToInt(Val, NewTy)
507 : Builder.CreateBitCast(Val, NewTy);
508
509 auto *NewRMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Addr, NewVal,
510 RMWI->getAlign(), RMWI->getOrdering(),
511 RMWI->getSyncScopeID());
512 NewRMWI->setVolatile(RMWI->isVolatile());
513 copyMetadataForAtomic(*NewRMWI, *RMWI);
514 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
515
516 Value *NewRVal = RMWI->getType()->isPointerTy()
517 ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType())
518 : Builder.CreateBitCast(NewRMWI, RMWI->getType());
519 RMWI->replaceAllUsesWith(NewRVal);
520 RMWI->eraseFromParent();
521 return NewRMWI;
522}
523
524bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
525 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
526 case TargetLoweringBase::AtomicExpansionKind::None:
527 return false;
528 case TargetLoweringBase::AtomicExpansionKind::LLSC:
529 expandAtomicOpToLLSC(
530 LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
531 LI->getOrdering(),
532 [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });
533 return true;
534 case TargetLoweringBase::AtomicExpansionKind::LLOnly:
535 return expandAtomicLoadToLL(LI);
536 case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
537 return expandAtomicLoadToCmpXchg(LI);
538 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
539 LI->setAtomic(AtomicOrdering::NotAtomic);
540 return true;
541 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
542 TLI->emitExpandAtomicLoad(LI);
543 return true;
544 default:
545 llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
546 }
547}
548
549bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
550 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
551 case TargetLoweringBase::AtomicExpansionKind::None:
552 return false;
553 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
554 TLI->emitExpandAtomicStore(SI);
555 return true;
556 case TargetLoweringBase::AtomicExpansionKind::Expand:
557 expandAtomicStoreToXChg(SI);
558 return true;
559 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
560 SI->setAtomic(AtomicOrdering::NotAtomic);
561 return true;
562 default:
563 llvm_unreachable("Unhandled case in tryExpandAtomicStore");
564 }
565}
566
567bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) {
568 ReplacementIRBuilder Builder(LI, *DL);
569
570 // On some architectures, load-linked instructions are atomic for larger
571 // sizes than normal loads. For example, the only 64-bit load guaranteed
572 // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
573 Value *Val = TLI->emitLoadLinked(Builder, LI->getType(),
574 LI->getPointerOperand(), LI->getOrdering());
576
577 LI->replaceAllUsesWith(Val);
578 LI->eraseFromParent();
579
580 return true;
581}
582
583bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
584 ReplacementIRBuilder Builder(LI, *DL);
585 AtomicOrdering Order = LI->getOrdering();
586 if (Order == AtomicOrdering::Unordered)
587 Order = AtomicOrdering::Monotonic;
588
589 Value *Addr = LI->getPointerOperand();
590 Type *Ty = LI->getType();
591 Constant *DummyVal = Constant::getNullValue(Ty);
592
593 Value *Pair = Builder.CreateAtomicCmpXchg(
594 Addr, DummyVal, DummyVal, LI->getAlign(), Order,
596 Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
597
598 LI->replaceAllUsesWith(Loaded);
599 LI->eraseFromParent();
600
601 return true;
602}
603
604/// Convert an atomic store of a non-integral type to an integer store of the
605/// equivalent bitwidth. We used to not support floating point or vector
606/// atomics in the IR at all. The backends learned to deal with the bitcast
607/// idiom because that was the only way of expressing the notion of a atomic
608/// float or vector store. The long term plan is to teach each backend to
609/// instruction select from the original atomic store, but as a migration
610/// mechanism, we convert back to the old format which the backends understand.
611/// Each backend will need individual work to recognize the new format.
612StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
613 ReplacementIRBuilder Builder(SI, *DL);
614 auto *M = SI->getModule();
615 Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
616 M->getDataLayout());
617 Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
618
619 Value *Addr = SI->getPointerOperand();
620
621 StoreInst *NewSI = Builder.CreateStore(NewVal, Addr);
622 NewSI->setAlignment(SI->getAlign());
623 NewSI->setVolatile(SI->isVolatile());
624 NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
625 LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
626 SI->eraseFromParent();
627 return NewSI;
628}
629
630void AtomicExpandImpl::expandAtomicStoreToXChg(StoreInst *SI) {
631 // This function is only called on atomic stores that are too large to be
632 // atomic if implemented as a native store. So we replace them by an
633 // atomic swap, that can be implemented for example as a ldrex/strex on ARM
634 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
635 // It is the responsibility of the target to only signal expansion via
636 // shouldExpandAtomicRMW in cases where this is required and possible.
637 ReplacementIRBuilder Builder(SI, *DL);
638 AtomicOrdering Ordering = SI->getOrdering();
639 assert(Ordering != AtomicOrdering::NotAtomic);
640 AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered
641 ? AtomicOrdering::Monotonic
642 : Ordering;
643 AtomicRMWInst *AI = Builder.CreateAtomicRMW(
644 AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
645 SI->getAlign(), RMWOrdering);
646 SI->eraseFromParent();
647
648 // Now we have an appropriate swap instruction, lower it as usual.
649 tryExpandAtomicRMW(AI);
650}
651
652static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr,
653 Value *Loaded, Value *NewVal, Align AddrAlign,
654 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
655 Value *&Success, Value *&NewLoaded,
656 Instruction *MetadataSrc) {
657 Type *OrigTy = NewVal->getType();
658
659 // This code can go away when cmpxchg supports FP and vector types.
660 assert(!OrigTy->isPointerTy());
661 bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy();
662 if (NeedBitcast) {
663 IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
664 NewVal = Builder.CreateBitCast(NewVal, IntTy);
665 Loaded = Builder.CreateBitCast(Loaded, IntTy);
666 }
667
668 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
669 Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
671 if (MetadataSrc)
672 copyMetadataForAtomic(*Pair, *MetadataSrc);
673
674 Success = Builder.CreateExtractValue(Pair, 1, "success");
675 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
676
677 if (NeedBitcast)
678 NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
679}
680
681bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {
682 LLVMContext &Ctx = AI->getModule()->getContext();
683 TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
684 switch (Kind) {
685 case TargetLoweringBase::AtomicExpansionKind::None:
686 return false;
687 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
688 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
689 unsigned ValueSize = getAtomicOpSize(AI);
690 if (ValueSize < MinCASSize) {
691 expandPartwordAtomicRMW(AI,
692 TargetLoweringBase::AtomicExpansionKind::LLSC);
693 } else {
694 auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
695 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
696 AI->getValOperand());
697 };
698 expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
699 AI->getAlign(), AI->getOrdering(), PerformOp);
700 }
701 return true;
702 }
703 case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
704 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
705 unsigned ValueSize = getAtomicOpSize(AI);
706 if (ValueSize < MinCASSize) {
707 expandPartwordAtomicRMW(AI,
708 TargetLoweringBase::AtomicExpansionKind::CmpXChg);
709 } else {
711 Ctx.getSyncScopeNames(SSNs);
712 auto MemScope = SSNs[AI->getSyncScopeID()].empty()
713 ? "system"
714 : SSNs[AI->getSyncScopeID()];
715 OptimizationRemarkEmitter ORE(AI->getFunction());
716 ORE.emit([&]() {
717 return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
718 << "A compare and swap loop was generated for an atomic "
719 << AI->getOperationName(AI->getOperation()) << " operation at "
720 << MemScope << " memory scope";
721 });
723 }
724 return true;
725 }
726 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
727 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
728 unsigned ValueSize = getAtomicOpSize(AI);
729 if (ValueSize < MinCASSize) {
731 // Widen And/Or/Xor and give the target another chance at expanding it.
734 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
735 return true;
736 }
737 }
738 expandAtomicRMWToMaskedIntrinsic(AI);
739 return true;
740 }
741 case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: {
743 return true;
744 }
745 case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: {
747 return true;
748 }
749 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
750 return lowerAtomicRMWInst(AI);
751 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
752 TLI->emitExpandAtomicRMW(AI);
753 return true;
754 default:
755 llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
756 }
757}
758
759namespace {
760
761struct PartwordMaskValues {
762 // These three fields are guaranteed to be set by createMaskInstrs.
763 Type *WordType = nullptr;
764 Type *ValueType = nullptr;
765 Type *IntValueType = nullptr;
766 Value *AlignedAddr = nullptr;
767 Align AlignedAddrAlignment;
768 // The remaining fields can be null.
769 Value *ShiftAmt = nullptr;
770 Value *Mask = nullptr;
771 Value *Inv_Mask = nullptr;
772};
773
774[[maybe_unused]]
775raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
776 auto PrintObj = [&O](auto *V) {
777 if (V)
778 O << *V;
779 else
780 O << "nullptr";
781 O << '\n';
782 };
783 O << "PartwordMaskValues {\n";
784 O << " WordType: ";
785 PrintObj(PMV.WordType);
786 O << " ValueType: ";
787 PrintObj(PMV.ValueType);
788 O << " AlignedAddr: ";
789 PrintObj(PMV.AlignedAddr);
790 O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
791 O << " ShiftAmt: ";
792 PrintObj(PMV.ShiftAmt);
793 O << " Mask: ";
794 PrintObj(PMV.Mask);
795 O << " Inv_Mask: ";
796 PrintObj(PMV.Inv_Mask);
797 O << "}\n";
798 return O;
799}
800
801} // end anonymous namespace
802
803/// This is a helper function which builds instructions to provide
804/// values necessary for partword atomic operations. It takes an
805/// incoming address, Addr, and ValueType, and constructs the address,
806/// shift-amounts and masks needed to work with a larger value of size
807/// WordSize.
808///
809/// AlignedAddr: Addr rounded down to a multiple of WordSize
810///
811/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
812/// from AlignAddr for it to have the same value as if
813/// ValueType was loaded from Addr.
814///
815/// Mask: Value to mask with the value loaded from AlignAddr to
816/// include only the part that would've been loaded from Addr.
817///
818/// Inv_Mask: The inverse of Mask.
819static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
821 Value *Addr, Align AddrAlign,
822 unsigned MinWordSize) {
823 PartwordMaskValues PMV;
824
825 Module *M = I->getModule();
826 LLVMContext &Ctx = M->getContext();
827 const DataLayout &DL = M->getDataLayout();
828 unsigned ValueSize = DL.getTypeStoreSize(ValueType);
829
830 PMV.ValueType = PMV.IntValueType = ValueType;
831 if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy())
832 PMV.IntValueType =
833 Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits());
834
835 PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
836 : ValueType;
837 if (PMV.ValueType == PMV.WordType) {
838 PMV.AlignedAddr = Addr;
839 PMV.AlignedAddrAlignment = AddrAlign;
840 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
841 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true);
842 return PMV;
843 }
844
845 PMV.AlignedAddrAlignment = Align(MinWordSize);
846
847 assert(ValueSize < MinWordSize);
848
849 PointerType *PtrTy = cast<PointerType>(Addr->getType());
850 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
851 Value *PtrLSB;
852
853 if (AddrAlign < MinWordSize) {
854 PMV.AlignedAddr = Builder.CreateIntrinsic(
855 Intrinsic::ptrmask, {PtrTy, IntTy},
856 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
857 "AlignedAddr");
858
859 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
860 PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
861 } else {
862 // If the alignment is high enough, the LSB are known 0.
863 PMV.AlignedAddr = Addr;
864 PtrLSB = ConstantInt::getNullValue(IntTy);
865 }
866
867 if (DL.isLittleEndian()) {
868 // turn bytes into bits
869 PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
870 } else {
871 // turn bytes into bits, and count from the other side.
872 PMV.ShiftAmt = Builder.CreateShl(
873 Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
874 }
875
876 PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
877 PMV.Mask = Builder.CreateShl(
878 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
879 "Mask");
880
881 PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
882
883 return PMV;
884}
885
886static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord,
887 const PartwordMaskValues &PMV) {
888 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
889 if (PMV.WordType == PMV.ValueType)
890 return WideWord;
891
892 Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
893 Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted");
894 return Builder.CreateBitCast(Trunc, PMV.ValueType);
895}
896
897static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord,
898 Value *Updated, const PartwordMaskValues &PMV) {
899 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
900 assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
901 if (PMV.WordType == PMV.ValueType)
902 return Updated;
903
904 Updated = Builder.CreateBitCast(Updated, PMV.IntValueType);
905
906 Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
907 Value *Shift =
908 Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
909 Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
910 Value *Or = Builder.CreateOr(And, Shift, "inserted");
911 return Or;
912}
913
914/// Emit IR to implement a masked version of a given atomicrmw
915/// operation. (That is, only the bits under the Mask should be
916/// affected by the operation)
918 IRBuilderBase &Builder, Value *Loaded,
919 Value *Shifted_Inc, Value *Inc,
920 const PartwordMaskValues &PMV) {
921 // TODO: update to use
922 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
923 // to merge bits from two values without requiring PMV.Inv_Mask.
924 switch (Op) {
925 case AtomicRMWInst::Xchg: {
926 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
927 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
928 return FinalVal;
929 }
933 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
936 case AtomicRMWInst::Nand: {
937 // The other arithmetic ops need to be masked into place.
938 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);
939 Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
940 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
941 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
942 return FinalVal;
943 }
958 // Finally, other ops will operate on the full value, so truncate down to
959 // the original size, and expand out again after doing the
960 // operation. Bitcasts will be inserted for FP values.
961 Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
962 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
963 Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
964 return FinalVal;
965 }
966 default:
967 llvm_unreachable("Unknown atomic op");
968 }
969}
970
971/// Expand a sub-word atomicrmw operation into an appropriate
972/// word-sized operation.
973///
974/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
975/// way as a typical atomicrmw expansion. The only difference here is
976/// that the operation inside of the loop may operate upon only a
977/// part of the value.
978void AtomicExpandImpl::expandPartwordAtomicRMW(
979 AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
980 // Widen And/Or/Xor and give the target another chance at expanding it.
984 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
985 return;
986 }
987 AtomicOrdering MemOpOrder = AI->getOrdering();
988 SyncScope::ID SSID = AI->getSyncScopeID();
989
990 ReplacementIRBuilder Builder(AI, *DL);
991
992 PartwordMaskValues PMV =
993 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
994 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
995
996 Value *ValOperand_Shifted = nullptr;
999 Value *ValOp = Builder.CreateBitCast(AI->getValOperand(), PMV.IntValueType);
1000 ValOperand_Shifted =
1001 Builder.CreateShl(Builder.CreateZExt(ValOp, PMV.WordType), PMV.ShiftAmt,
1002 "ValOperand_Shifted");
1003 }
1004
1005 auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
1006 return performMaskedAtomicOp(Op, Builder, Loaded, ValOperand_Shifted,
1007 AI->getValOperand(), PMV);
1008 };
1009
1010 Value *OldResult;
1011 if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
1012 OldResult = insertRMWCmpXchgLoop(
1013 Builder, PMV.WordType, PMV.AlignedAddr, PMV.AlignedAddrAlignment,
1014 MemOpOrder, SSID, PerformPartwordOp, createCmpXchgInstFun, AI);
1015 } else {
1016 assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
1017 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
1018 PMV.AlignedAddrAlignment, MemOpOrder,
1019 PerformPartwordOp);
1020 }
1021
1022 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1023 AI->replaceAllUsesWith(FinalOldResult);
1024 AI->eraseFromParent();
1025}
1026
1027// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
1028AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
1029 ReplacementIRBuilder Builder(AI, *DL);
1031
1033 Op == AtomicRMWInst::And) &&
1034 "Unable to widen operation");
1035
1036 PartwordMaskValues PMV =
1037 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1038 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1039
1040 Value *ValOperand_Shifted =
1041 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
1042 PMV.ShiftAmt, "ValOperand_Shifted");
1043
1044 Value *NewOperand;
1045
1046 if (Op == AtomicRMWInst::And)
1047 NewOperand =
1048 Builder.CreateOr(ValOperand_Shifted, PMV.Inv_Mask, "AndOperand");
1049 else
1050 NewOperand = ValOperand_Shifted;
1051
1052 AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
1053 Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
1054 AI->getOrdering(), AI->getSyncScopeID());
1055
1056 copyMetadataForAtomic(*NewAI, *AI);
1057
1058 Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
1059 AI->replaceAllUsesWith(FinalOldResult);
1060 AI->eraseFromParent();
1061 return NewAI;
1062}
1063
1064bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
1065 // The basic idea here is that we're expanding a cmpxchg of a
1066 // smaller memory size up to a word-sized cmpxchg. To do this, we
1067 // need to add a retry-loop for strong cmpxchg, so that
1068 // modifications to other parts of the word don't cause a spurious
1069 // failure.
1070
1071 // This generates code like the following:
1072 // [[Setup mask values PMV.*]]
1073 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
1074 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
1075 // %InitLoaded = load i32* %addr
1076 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
1077 // br partword.cmpxchg.loop
1078 // partword.cmpxchg.loop:
1079 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
1080 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
1081 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
1082 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
1083 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
1084 // i32 %FullWord_NewVal success_ordering failure_ordering
1085 // %OldVal = extractvalue { i32, i1 } %NewCI, 0
1086 // %Success = extractvalue { i32, i1 } %NewCI, 1
1087 // br i1 %Success, label %partword.cmpxchg.end,
1088 // label %partword.cmpxchg.failure
1089 // partword.cmpxchg.failure:
1090 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
1091 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
1092 // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
1093 // label %partword.cmpxchg.end
1094 // partword.cmpxchg.end:
1095 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
1096 // %FinalOldVal = trunc i32 %tmp1 to i8
1097 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
1098 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
1099
1100 Value *Addr = CI->getPointerOperand();
1101 Value *Cmp = CI->getCompareOperand();
1102 Value *NewVal = CI->getNewValOperand();
1103
1104 BasicBlock *BB = CI->getParent();
1105 Function *F = BB->getParent();
1106 ReplacementIRBuilder Builder(CI, *DL);
1107 LLVMContext &Ctx = Builder.getContext();
1108
1109 BasicBlock *EndBB =
1110 BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
1111 auto FailureBB =
1112 BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
1113 auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
1114
1115 // The split call above "helpfully" added a branch at the end of BB
1116 // (to the wrong place).
1117 std::prev(BB->end())->eraseFromParent();
1118 Builder.SetInsertPoint(BB);
1119
1120 PartwordMaskValues PMV =
1121 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1122 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1123
1124 // Shift the incoming values over, into the right location in the word.
1125 Value *NewVal_Shifted =
1126 Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
1127 Value *Cmp_Shifted =
1128 Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
1129
1130 // Load the entire current word, and mask into place the expected and new
1131 // values
1132 LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
1133 InitLoaded->setVolatile(CI->isVolatile());
1134 Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
1135 Builder.CreateBr(LoopBB);
1136
1137 // partword.cmpxchg.loop:
1138 Builder.SetInsertPoint(LoopBB);
1139 PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
1140 Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
1141
1142 // Mask/Or the expected and new values into place in the loaded word.
1143 Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
1144 Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
1145 AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
1146 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1148 NewCI->setVolatile(CI->isVolatile());
1149 // When we're building a strong cmpxchg, we need a loop, so you
1150 // might think we could use a weak cmpxchg inside. But, using strong
1151 // allows the below comparison for ShouldContinue, and we're
1152 // expecting the underlying cmpxchg to be a machine instruction,
1153 // which is strong anyways.
1154 NewCI->setWeak(CI->isWeak());
1155
1156 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1157 Value *Success = Builder.CreateExtractValue(NewCI, 1);
1158
1159 if (CI->isWeak())
1160 Builder.CreateBr(EndBB);
1161 else
1162 Builder.CreateCondBr(Success, EndBB, FailureBB);
1163
1164 // partword.cmpxchg.failure:
1165 Builder.SetInsertPoint(FailureBB);
1166 // Upon failure, verify that the masked-out part of the loaded value
1167 // has been modified. If it didn't, abort the cmpxchg, since the
1168 // masked-in part must've.
1169 Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
1170 Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
1171 Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
1172
1173 // Add the second value to the phi from above
1174 Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
1175
1176 // partword.cmpxchg.end:
1177 Builder.SetInsertPoint(CI);
1178
1179 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1180 Value *Res = PoisonValue::get(CI->getType());
1181 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1182 Res = Builder.CreateInsertValue(Res, Success, 1);
1183
1184 CI->replaceAllUsesWith(Res);
1185 CI->eraseFromParent();
1186 return true;
1187}
1188
1189void AtomicExpandImpl::expandAtomicOpToLLSC(
1190 Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
1191 AtomicOrdering MemOpOrder,
1192 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1193 ReplacementIRBuilder Builder(I, *DL);
1194 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
1195 MemOpOrder, PerformOp);
1196
1197 I->replaceAllUsesWith(Loaded);
1198 I->eraseFromParent();
1199}
1200
1201void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1202 ReplacementIRBuilder Builder(AI, *DL);
1203
1204 PartwordMaskValues PMV =
1205 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1206 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1207
1208 // The value operand must be sign-extended for signed min/max so that the
1209 // target's signed comparison instructions can be used. Otherwise, just
1210 // zero-ext.
1211 Instruction::CastOps CastOp = Instruction::ZExt;
1212 AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1213 if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
1214 CastOp = Instruction::SExt;
1215
1216 Value *ValOperand_Shifted = Builder.CreateShl(
1217 Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
1218 PMV.ShiftAmt, "ValOperand_Shifted");
1219 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1220 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1221 AI->getOrdering());
1222 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1223 AI->replaceAllUsesWith(FinalOldResult);
1224 AI->eraseFromParent();
1225}
1226
1227void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1228 AtomicCmpXchgInst *CI) {
1229 ReplacementIRBuilder Builder(CI, *DL);
1230
1231 PartwordMaskValues PMV = createMaskInstrs(
1232 Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
1233 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1234
1235 Value *CmpVal_Shifted = Builder.CreateShl(
1236 Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
1237 "CmpVal_Shifted");
1238 Value *NewVal_Shifted = Builder.CreateShl(
1239 Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
1240 "NewVal_Shifted");
1242 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1243 CI->getMergedOrdering());
1244 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1245 Value *Res = PoisonValue::get(CI->getType());
1246 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1247 Value *Success = Builder.CreateICmpEQ(
1248 CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
1249 Res = Builder.CreateInsertValue(Res, Success, 1);
1250
1251 CI->replaceAllUsesWith(Res);
1252 CI->eraseFromParent();
1253}
1254
1255Value *AtomicExpandImpl::insertRMWLLSCLoop(
1256 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1257 AtomicOrdering MemOpOrder,
1258 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1259 LLVMContext &Ctx = Builder.getContext();
1260 BasicBlock *BB = Builder.GetInsertBlock();
1261 Function *F = BB->getParent();
1262
1263 assert(AddrAlign >= F->getDataLayout().getTypeStoreSize(ResultTy) &&
1264 "Expected at least natural alignment at this point.");
1265
1266 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1267 //
1268 // The standard expansion we produce is:
1269 // [...]
1270 // atomicrmw.start:
1271 // %loaded = @load.linked(%addr)
1272 // %new = some_op iN %loaded, %incr
1273 // %stored = @store_conditional(%new, %addr)
1274 // %try_again = icmp i32 ne %stored, 0
1275 // br i1 %try_again, label %loop, label %atomicrmw.end
1276 // atomicrmw.end:
1277 // [...]
1278 BasicBlock *ExitBB =
1279 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1280 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1281
1282 // The split call above "helpfully" added a branch at the end of BB (to the
1283 // wrong place).
1284 std::prev(BB->end())->eraseFromParent();
1285 Builder.SetInsertPoint(BB);
1286 Builder.CreateBr(LoopBB);
1287
1288 // Start the main loop block now that we've taken care of the preliminaries.
1289 Builder.SetInsertPoint(LoopBB);
1290 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder);
1291
1292 Value *NewVal = PerformOp(Builder, Loaded);
1293
1294 Value *StoreSuccess =
1295 TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1296 Value *TryAgain = Builder.CreateICmpNE(
1297 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1298
1299 Instruction *CondBr = Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1300
1301 // Atomic RMW expands to a Load-linked / Store-Conditional loop, because it is
1302 // hard to predict precise branch weigths we mark the branch as "unknown"
1303 // (50/50) to prevent misleading optimizations.
1305
1306 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1307 return Loaded;
1308}
1309
1310/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1311/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1312/// IR. As a migration step, we convert back to what use to be the standard
1313/// way to represent a pointer cmpxchg so that we can update backends one by
1314/// one.
1315AtomicCmpXchgInst *
1316AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1317 auto *M = CI->getModule();
1318 Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1319 M->getDataLayout());
1320
1321 ReplacementIRBuilder Builder(CI, *DL);
1322
1323 Value *Addr = CI->getPointerOperand();
1324
1325 Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1326 Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1327
1328 auto *NewCI = Builder.CreateAtomicCmpXchg(
1329 Addr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
1330 CI->getFailureOrdering(), CI->getSyncScopeID());
1331 NewCI->setVolatile(CI->isVolatile());
1332 NewCI->setWeak(CI->isWeak());
1333 LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1334
1335 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1336 Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1337
1338 OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1339
1340 Value *Res = PoisonValue::get(CI->getType());
1341 Res = Builder.CreateInsertValue(Res, OldVal, 0);
1342 Res = Builder.CreateInsertValue(Res, Succ, 1);
1343
1344 CI->replaceAllUsesWith(Res);
1345 CI->eraseFromParent();
1346 return NewCI;
1347}
1348
1349bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1350 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1351 AtomicOrdering FailureOrder = CI->getFailureOrdering();
1352 Value *Addr = CI->getPointerOperand();
1353 BasicBlock *BB = CI->getParent();
1354 Function *F = BB->getParent();
1355 LLVMContext &Ctx = F->getContext();
1356 // If shouldInsertFencesForAtomic() returns true, then the target does not
1357 // want to deal with memory orders, and emitLeading/TrailingFence should take
1358 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1359 // should preserve the ordering.
1360 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1361 AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1362 ? AtomicOrdering::Monotonic
1363 : CI->getMergedOrdering();
1364
1365 // In implementations which use a barrier to achieve release semantics, we can
1366 // delay emitting this barrier until we know a store is actually going to be
1367 // attempted. The cost of this delay is that we need 2 copies of the block
1368 // emitting the load-linked, affecting code size.
1369 //
1370 // Ideally, this logic would be unconditional except for the minsize check
1371 // since in other cases the extra blocks naturally collapse down to the
1372 // minimal loop. Unfortunately, this puts too much stress on later
1373 // optimisations so we avoid emitting the extra logic in those cases too.
1374 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1375 SuccessOrder != AtomicOrdering::Monotonic &&
1376 SuccessOrder != AtomicOrdering::Acquire &&
1377 !F->hasMinSize();
1378
1379 // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1380 // do it even on minsize.
1381 bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1382
1383 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1384 //
1385 // The full expansion we produce is:
1386 // [...]
1387 // %aligned.addr = ...
1388 // cmpxchg.start:
1389 // %unreleasedload = @load.linked(%aligned.addr)
1390 // %unreleasedload.extract = extract value from %unreleasedload
1391 // %should_store = icmp eq %unreleasedload.extract, %desired
1392 // br i1 %should_store, label %cmpxchg.releasingstore,
1393 // label %cmpxchg.nostore
1394 // cmpxchg.releasingstore:
1395 // fence?
1396 // br label cmpxchg.trystore
1397 // cmpxchg.trystore:
1398 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1399 // [%releasedload, %cmpxchg.releasedload]
1400 // %updated.new = insert %new into %loaded.trystore
1401 // %stored = @store_conditional(%updated.new, %aligned.addr)
1402 // %success = icmp eq i32 %stored, 0
1403 // br i1 %success, label %cmpxchg.success,
1404 // label %cmpxchg.releasedload/%cmpxchg.failure
1405 // cmpxchg.releasedload:
1406 // %releasedload = @load.linked(%aligned.addr)
1407 // %releasedload.extract = extract value from %releasedload
1408 // %should_store = icmp eq %releasedload.extract, %desired
1409 // br i1 %should_store, label %cmpxchg.trystore,
1410 // label %cmpxchg.failure
1411 // cmpxchg.success:
1412 // fence?
1413 // br label %cmpxchg.end
1414 // cmpxchg.nostore:
1415 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1416 // [%releasedload,
1417 // %cmpxchg.releasedload/%cmpxchg.trystore]
1418 // @load_linked_fail_balance()?
1419 // br label %cmpxchg.failure
1420 // cmpxchg.failure:
1421 // fence?
1422 // br label %cmpxchg.end
1423 // cmpxchg.end:
1424 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1425 // [%loaded.trystore, %cmpxchg.trystore]
1426 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1427 // %loaded = extract value from %loaded.exit
1428 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1429 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1430 // [...]
1431 BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1432 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1433 auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1434 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1435 auto ReleasedLoadBB =
1436 BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1437 auto TryStoreBB =
1438 BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1439 auto ReleasingStoreBB =
1440 BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1441 auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1442
1443 ReplacementIRBuilder Builder(CI, *DL);
1444
1445 // The split call above "helpfully" added a branch at the end of BB (to the
1446 // wrong place), but we might want a fence too. It's easiest to just remove
1447 // the branch entirely.
1448 std::prev(BB->end())->eraseFromParent();
1449 Builder.SetInsertPoint(BB);
1450 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1451 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1452
1453 PartwordMaskValues PMV =
1454 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1455 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1456 Builder.CreateBr(StartBB);
1457
1458 // Start the main loop block now that we've taken care of the preliminaries.
1459 Builder.SetInsertPoint(StartBB);
1460 Value *UnreleasedLoad =
1461 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1462 Value *UnreleasedLoadExtract =
1463 extractMaskedValue(Builder, UnreleasedLoad, PMV);
1464 Value *ShouldStore = Builder.CreateICmpEQ(
1465 UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
1466
1467 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1468 // jump straight past that fence instruction (if it exists).
1469 Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB,
1470 MDBuilder(F->getContext()).createLikelyBranchWeights());
1471
1472 Builder.SetInsertPoint(ReleasingStoreBB);
1473 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1474 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1475 Builder.CreateBr(TryStoreBB);
1476
1477 Builder.SetInsertPoint(TryStoreBB);
1478 PHINode *LoadedTryStore =
1479 Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
1480 LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1481 Value *NewValueInsert =
1482 insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
1483 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1484 PMV.AlignedAddr, MemOpOrder);
1485 StoreSuccess = Builder.CreateICmpEQ(
1486 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1487 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1488 Builder.CreateCondBr(StoreSuccess, SuccessBB,
1489 CI->isWeak() ? FailureBB : RetryBB,
1490 MDBuilder(F->getContext()).createLikelyBranchWeights());
1491
1492 Builder.SetInsertPoint(ReleasedLoadBB);
1493 Value *SecondLoad;
1494 if (HasReleasedLoadBB) {
1495 SecondLoad =
1496 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1497 Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
1498 ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
1499 CI->getCompareOperand(), "should_store");
1500
1501 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1502 // jump straight past that fence instruction (if it exists).
1503 Builder.CreateCondBr(
1504 ShouldStore, TryStoreBB, NoStoreBB,
1505 MDBuilder(F->getContext()).createLikelyBranchWeights());
1506 // Update PHI node in TryStoreBB.
1507 LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
1508 } else
1509 Builder.CreateUnreachable();
1510
1511 // Make sure later instructions don't get reordered with a fence if
1512 // necessary.
1513 Builder.SetInsertPoint(SuccessBB);
1514 if (ShouldInsertFencesForAtomic ||
1516 TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1517 Builder.CreateBr(ExitBB);
1518
1519 Builder.SetInsertPoint(NoStoreBB);
1520 PHINode *LoadedNoStore =
1521 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
1522 LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
1523 if (HasReleasedLoadBB)
1524 LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
1525
1526 // In the failing case, where we don't execute the store-conditional, the
1527 // target might want to balance out the load-linked with a dedicated
1528 // instruction (e.g., on ARM, clearing the exclusive monitor).
1530 Builder.CreateBr(FailureBB);
1531
1532 Builder.SetInsertPoint(FailureBB);
1533 PHINode *LoadedFailure =
1534 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
1535 LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
1536 if (CI->isWeak())
1537 LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
1538 if (ShouldInsertFencesForAtomic)
1539 TLI->emitTrailingFence(Builder, CI, FailureOrder);
1540 Builder.CreateBr(ExitBB);
1541
1542 // Finally, we have control-flow based knowledge of whether the cmpxchg
1543 // succeeded or not. We expose this to later passes by converting any
1544 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1545 // PHI.
1546 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1547 PHINode *LoadedExit =
1548 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
1549 LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
1550 LoadedExit->addIncoming(LoadedFailure, FailureBB);
1551 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
1552 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1553 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1554
1555 // This is the "exit value" from the cmpxchg expansion. It may be of
1556 // a type wider than the one in the cmpxchg instruction.
1557 Value *LoadedFull = LoadedExit;
1558
1559 Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
1560 Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
1561
1562 // Look for any users of the cmpxchg that are just comparing the loaded value
1563 // against the desired one, and replace them with the CFG-derived version.
1565 for (auto *User : CI->users()) {
1566 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
1567 if (!EV)
1568 continue;
1569
1570 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1571 "weird extraction from { iN, i1 }");
1572
1573 if (EV->getIndices()[0] == 0)
1574 EV->replaceAllUsesWith(Loaded);
1575 else
1577
1578 PrunedInsts.push_back(EV);
1579 }
1580
1581 // We can remove the instructions now we're no longer iterating through them.
1582 for (auto *EV : PrunedInsts)
1583 EV->eraseFromParent();
1584
1585 if (!CI->use_empty()) {
1586 // Some use of the full struct return that we don't understand has happened,
1587 // so we've got to reconstruct it properly.
1588 Value *Res;
1589 Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0);
1590 Res = Builder.CreateInsertValue(Res, Success, 1);
1591
1592 CI->replaceAllUsesWith(Res);
1593 }
1594
1595 CI->eraseFromParent();
1596 return true;
1597}
1598
1599bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
1600 // TODO: Add floating point support.
1601 auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1602 if (!C)
1603 return false;
1604
1605 switch (RMWI->getOperation()) {
1606 case AtomicRMWInst::Add:
1607 case AtomicRMWInst::Sub:
1608 case AtomicRMWInst::Or:
1609 case AtomicRMWInst::Xor:
1610 return C->isZero();
1611 case AtomicRMWInst::And:
1612 return C->isMinusOne();
1613 case AtomicRMWInst::Min:
1614 return C->isMaxValue(true);
1615 case AtomicRMWInst::Max:
1616 return C->isMinValue(true);
1618 return C->isMaxValue(false);
1620 return C->isMinValue(false);
1621 default:
1622 return false;
1623 }
1624}
1625
1626bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
1627 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1628 tryExpandAtomicLoad(ResultingLoad);
1629 return true;
1630 }
1631 return false;
1632}
1633
1634Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
1635 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1636 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
1637 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
1638 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc) {
1639 LLVMContext &Ctx = Builder.getContext();
1640 BasicBlock *BB = Builder.GetInsertBlock();
1641 Function *F = BB->getParent();
1642
1643 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1644 //
1645 // The standard expansion we produce is:
1646 // [...]
1647 // %init_loaded = load atomic iN* %addr
1648 // br label %loop
1649 // loop:
1650 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1651 // %new = some_op iN %loaded, %incr
1652 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1653 // %new_loaded = extractvalue { iN, i1 } %pair, 0
1654 // %success = extractvalue { iN, i1 } %pair, 1
1655 // br i1 %success, label %atomicrmw.end, label %loop
1656 // atomicrmw.end:
1657 // [...]
1658 BasicBlock *ExitBB =
1659 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1660 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1661
1662 // The split call above "helpfully" added a branch at the end of BB (to the
1663 // wrong place), but we want a load. It's easiest to just remove
1664 // the branch entirely.
1665 std::prev(BB->end())->eraseFromParent();
1666 Builder.SetInsertPoint(BB);
1667 LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
1668 Builder.CreateBr(LoopBB);
1669
1670 // Start the main loop block now that we've taken care of the preliminaries.
1671 Builder.SetInsertPoint(LoopBB);
1672 PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1673 Loaded->addIncoming(InitLoaded, BB);
1674
1675 Value *NewVal = PerformOp(Builder, Loaded);
1676
1677 Value *NewLoaded = nullptr;
1678 Value *Success = nullptr;
1679
1680 CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1681 MemOpOrder == AtomicOrdering::Unordered
1682 ? AtomicOrdering::Monotonic
1683 : MemOpOrder,
1684 SSID, Success, NewLoaded, MetadataSrc);
1685 assert(Success && NewLoaded);
1686
1687 Loaded->addIncoming(NewLoaded, LoopBB);
1688
1689 Instruction *CondBr = Builder.CreateCondBr(Success, ExitBB, LoopBB);
1690
1691 // Atomic RMW expands to a cmpxchg loop, Since precise branch weights
1692 // cannot be easily determined here, we mark the branch as "unknown" (50/50)
1693 // to prevent misleading optimizations.
1695
1696 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1697 return NewLoaded;
1698}
1699
1700bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1701 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1702 unsigned ValueSize = getAtomicOpSize(CI);
1703
1704 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1705 default:
1706 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1707 case TargetLoweringBase::AtomicExpansionKind::None:
1708 if (ValueSize < MinCASSize)
1709 return expandPartwordCmpXchg(CI);
1710 return false;
1711 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
1712 return expandAtomicCmpXchg(CI);
1713 }
1714 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
1715 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1716 return true;
1717 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
1718 return lowerAtomicCmpXchgInst(CI);
1719 case TargetLoweringBase::AtomicExpansionKind::CustomExpand: {
1720 TLI->emitExpandAtomicCmpXchg(CI);
1721 return true;
1722 }
1723 }
1724}
1725
1726// Note: This function is exposed externally by AtomicExpandUtils.h
1728 CreateCmpXchgInstFun CreateCmpXchg) {
1729 ReplacementIRBuilder Builder(AI, AI->getDataLayout());
1730 Builder.setIsFPConstrained(
1731 AI->getFunction()->hasFnAttribute(Attribute::StrictFP));
1732
1733 // FIXME: If FP exceptions are observable, we should force them off for the
1734 // loop for the FP atomics.
1735 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
1736 Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
1737 AI->getOrdering(), AI->getSyncScopeID(),
1738 [&](IRBuilderBase &Builder, Value *Loaded) {
1739 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1740 AI->getValOperand());
1741 },
1742 CreateCmpXchg, /*MetadataSrc=*/AI);
1743
1744 AI->replaceAllUsesWith(Loaded);
1745 AI->eraseFromParent();
1746 return true;
1747}
1748
1749// In order to use one of the sized library calls such as
1750// __atomic_fetch_add_4, the alignment must be sufficient, the size
1751// must be one of the potentially-specialized sizes, and the value
1752// type must actually exist in C on the target (otherwise, the
1753// function wouldn't actually be defined.)
1754static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1755 const DataLayout &DL) {
1756 // TODO: "LargestSize" is an approximation for "largest type that
1757 // you can express in C". It seems to be the case that int128 is
1758 // supported on all 64-bit platforms, otherwise only up to 64-bit
1759 // integers are supported. If we get this wrong, then we'll try to
1760 // call a sized libcall that doesn't actually exist. There should
1761 // really be some more reliable way in LLVM of determining integer
1762 // sizes which are valid in the target's C ABI...
1763 unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1764 return Alignment >= Size &&
1765 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1766 Size <= LargestSize;
1767}
1768
1769void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) {
1770 static const RTLIB::Libcall Libcalls[6] = {
1771 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1772 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1773 unsigned Size = getAtomicOpSize(I);
1774
1775 bool expanded = expandAtomicOpToLibcall(
1776 I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
1777 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1778 if (!expanded)
1779 handleFailure(*I, "unsupported atomic load");
1780}
1781
1782void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {
1783 static const RTLIB::Libcall Libcalls[6] = {
1784 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1785 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1786 unsigned Size = getAtomicOpSize(I);
1787
1788 bool expanded = expandAtomicOpToLibcall(
1789 I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
1790 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1791 if (!expanded)
1792 handleFailure(*I, "unsupported atomic store");
1793}
1794
1795void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1796 static const RTLIB::Libcall Libcalls[6] = {
1797 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1798 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1799 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1800 unsigned Size = getAtomicOpSize(I);
1801
1802 bool expanded = expandAtomicOpToLibcall(
1803 I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
1804 I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
1805 Libcalls);
1806 if (!expanded)
1807 handleFailure(*I, "unsupported cmpxchg");
1808}
1809
1811 static const RTLIB::Libcall LibcallsXchg[6] = {
1812 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1813 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1814 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1815 static const RTLIB::Libcall LibcallsAdd[6] = {
1816 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1817 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1818 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1819 static const RTLIB::Libcall LibcallsSub[6] = {
1820 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1821 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1822 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1823 static const RTLIB::Libcall LibcallsAnd[6] = {
1824 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1825 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1826 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1827 static const RTLIB::Libcall LibcallsOr[6] = {
1828 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1829 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1830 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1831 static const RTLIB::Libcall LibcallsXor[6] = {
1832 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1833 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1834 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1835 static const RTLIB::Libcall LibcallsNand[6] = {
1836 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1837 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1838 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1839
1840 switch (Op) {
1842 llvm_unreachable("Should not have BAD_BINOP.");
1844 return ArrayRef(LibcallsXchg);
1845 case AtomicRMWInst::Add:
1846 return ArrayRef(LibcallsAdd);
1847 case AtomicRMWInst::Sub:
1848 return ArrayRef(LibcallsSub);
1849 case AtomicRMWInst::And:
1850 return ArrayRef(LibcallsAnd);
1851 case AtomicRMWInst::Or:
1852 return ArrayRef(LibcallsOr);
1853 case AtomicRMWInst::Xor:
1854 return ArrayRef(LibcallsXor);
1856 return ArrayRef(LibcallsNand);
1857 case AtomicRMWInst::Max:
1858 case AtomicRMWInst::Min:
1871 // No atomic libcalls are available for these.
1872 return {};
1873 }
1874 llvm_unreachable("Unexpected AtomicRMW operation.");
1875}
1876
1877void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1878 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
1879
1880 unsigned Size = getAtomicOpSize(I);
1881
1882 bool Success = false;
1883 if (!Libcalls.empty())
1884 Success = expandAtomicOpToLibcall(
1885 I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
1886 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1887
1888 // The expansion failed: either there were no libcalls at all for
1889 // the operation (min/max), or there were only size-specialized
1890 // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1891 // CAS libcall, via a CAS loop, instead.
1892 if (!Success) {
1894 I, [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded,
1895 Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
1896 SyncScope::ID SSID, Value *&Success, Value *&NewLoaded,
1897 Instruction *MetadataSrc) {
1898 // Create the CAS instruction normally...
1899 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1900 Addr, Loaded, NewVal, Alignment, MemOpOrder,
1902 if (MetadataSrc)
1903 copyMetadataForAtomic(*Pair, *MetadataSrc);
1904
1905 Success = Builder.CreateExtractValue(Pair, 1, "success");
1906 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1907
1908 // ...and then expand the CAS into a libcall.
1909 expandAtomicCASToLibcall(Pair);
1910 });
1911 }
1912}
1913
1914// A helper routine for the above expandAtomic*ToLibcall functions.
1915//
1916// 'Libcalls' contains an array of enum values for the particular
1917// ATOMIC libcalls to be emitted. All of the other arguments besides
1918// 'I' are extracted from the Instruction subclass by the
1919// caller. Depending on the particular call, some will be null.
1920bool AtomicExpandImpl::expandAtomicOpToLibcall(
1921 Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
1922 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1923 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1924 assert(Libcalls.size() == 6);
1925
1926 LLVMContext &Ctx = I->getContext();
1927 Module *M = I->getModule();
1928 const DataLayout &DL = M->getDataLayout();
1929 IRBuilder<> Builder(I);
1930 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1931
1932 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
1933 Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1934
1935 const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
1936
1937 // TODO: the "order" argument type is "int", not int32. So
1938 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1939 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1940 Constant *OrderingVal =
1941 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
1942 Constant *Ordering2Val = nullptr;
1943 if (CASExpected) {
1944 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1945 Ordering2Val =
1946 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
1947 }
1948 bool HasResult = I->getType() != Type::getVoidTy(Ctx);
1949
1950 RTLIB::Libcall RTLibType;
1951 if (UseSizedLibcall) {
1952 switch (Size) {
1953 case 1:
1954 RTLibType = Libcalls[1];
1955 break;
1956 case 2:
1957 RTLibType = Libcalls[2];
1958 break;
1959 case 4:
1960 RTLibType = Libcalls[3];
1961 break;
1962 case 8:
1963 RTLibType = Libcalls[4];
1964 break;
1965 case 16:
1966 RTLibType = Libcalls[5];
1967 break;
1968 }
1969 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1970 RTLibType = Libcalls[0];
1971 } else {
1972 // Can't use sized function, and there's no generic for this
1973 // operation, so give up.
1974 return false;
1975 }
1976
1977 if (!TLI->getLibcallName(RTLibType)) {
1978 // This target does not implement the requested atomic libcall so give up.
1979 return false;
1980 }
1981
1982 // Build up the function call. There's two kinds. First, the sized
1983 // variants. These calls are going to be one of the following (with
1984 // N=1,2,4,8,16):
1985 // iN __atomic_load_N(iN *ptr, int ordering)
1986 // void __atomic_store_N(iN *ptr, iN val, int ordering)
1987 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
1988 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
1989 // int success_order, int failure_order)
1990 //
1991 // Note that these functions can be used for non-integer atomic
1992 // operations, the values just need to be bitcast to integers on the
1993 // way in and out.
1994 //
1995 // And, then, the generic variants. They look like the following:
1996 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1997 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1998 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
1999 // int ordering)
2000 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
2001 // void *desired, int success_order,
2002 // int failure_order)
2003 //
2004 // The different signatures are built up depending on the
2005 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
2006 // variables.
2007
2008 AllocaInst *AllocaCASExpected = nullptr;
2009 AllocaInst *AllocaValue = nullptr;
2010 AllocaInst *AllocaResult = nullptr;
2011
2012 Type *ResultTy;
2014 AttributeList Attr;
2015
2016 // 'size' argument.
2017 if (!UseSizedLibcall) {
2018 // Note, getIntPtrType is assumed equivalent to size_t.
2019 Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
2020 }
2021
2022 // 'ptr' argument.
2023 // note: This assumes all address spaces share a common libfunc
2024 // implementation and that addresses are convertable. For systems without
2025 // that property, we'd need to extend this mechanism to support AS-specific
2026 // families of atomic intrinsics.
2027 Value *PtrVal = PointerOperand;
2028 PtrVal = Builder.CreateAddrSpaceCast(PtrVal, PointerType::getUnqual(Ctx));
2029 Args.push_back(PtrVal);
2030
2031 // 'expected' argument, if present.
2032 if (CASExpected) {
2033 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
2034 AllocaCASExpected->setAlignment(AllocaAlignment);
2035 Builder.CreateLifetimeStart(AllocaCASExpected);
2036 Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
2037 Args.push_back(AllocaCASExpected);
2038 }
2039
2040 // 'val' argument ('desired' for cas), if present.
2041 if (ValueOperand) {
2042 if (UseSizedLibcall) {
2043 Value *IntValue =
2044 Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
2045 Args.push_back(IntValue);
2046 } else {
2047 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
2048 AllocaValue->setAlignment(AllocaAlignment);
2049 Builder.CreateLifetimeStart(AllocaValue);
2050 Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
2051 Args.push_back(AllocaValue);
2052 }
2053 }
2054
2055 // 'ret' argument.
2056 if (!CASExpected && HasResult && !UseSizedLibcall) {
2057 AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
2058 AllocaResult->setAlignment(AllocaAlignment);
2059 Builder.CreateLifetimeStart(AllocaResult);
2060 Args.push_back(AllocaResult);
2061 }
2062
2063 // 'ordering' ('success_order' for cas) argument.
2064 Args.push_back(OrderingVal);
2065
2066 // 'failure_order' argument, if present.
2067 if (Ordering2Val)
2068 Args.push_back(Ordering2Val);
2069
2070 // Now, the return type.
2071 if (CASExpected) {
2072 ResultTy = Type::getInt1Ty(Ctx);
2073 Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
2074 } else if (HasResult && UseSizedLibcall)
2075 ResultTy = SizedIntTy;
2076 else
2077 ResultTy = Type::getVoidTy(Ctx);
2078
2079 // Done with setting up arguments and return types, create the call:
2081 for (Value *Arg : Args)
2082 ArgTys.push_back(Arg->getType());
2083 FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
2084 FunctionCallee LibcallFn =
2085 M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
2086 CallInst *Call = Builder.CreateCall(LibcallFn, Args);
2087 Call->setAttributes(Attr);
2088 Value *Result = Call;
2089
2090 // And then, extract the results...
2091 if (ValueOperand && !UseSizedLibcall)
2092 Builder.CreateLifetimeEnd(AllocaValue);
2093
2094 if (CASExpected) {
2095 // The final result from the CAS is {load of 'expected' alloca, bool result
2096 // from call}
2097 Type *FinalResultTy = I->getType();
2098 Value *V = PoisonValue::get(FinalResultTy);
2099 Value *ExpectedOut = Builder.CreateAlignedLoad(
2100 CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
2101 Builder.CreateLifetimeEnd(AllocaCASExpected);
2102 V = Builder.CreateInsertValue(V, ExpectedOut, 0);
2103 V = Builder.CreateInsertValue(V, Result, 1);
2105 } else if (HasResult) {
2106 Value *V;
2107 if (UseSizedLibcall)
2108 V = Builder.CreateBitOrPointerCast(Result, I->getType());
2109 else {
2110 V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
2111 AllocaAlignment);
2112 Builder.CreateLifetimeEnd(AllocaResult);
2113 }
2115 }
2116 I->eraseFromParent();
2117 return true;
2118}
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded, Instruction *MetadataSrc)
Expand Atomic static false unsigned getAtomicOpSize(LoadInst *LI)
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
static void copyMetadataForAtomic(Instruction &Dest, const Instruction &Source)
Copy metadata that's safe to preserve when widening atomics.
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
Module.h This file contains the declarations for the Module class.
static bool isIdempotentRMW(AtomicRMWInst &RMWI)
Return true if and only if the given instruction does not modify the memory location referenced.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Machine Check Debug Module
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
#define T
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file contains the declarations for profiling metadata utility functions.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
void setAlignment(Align Align)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:143
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:138
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
void setWeak(bool IsWeak)
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ FSub
*p = old - v
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
static LLVM_ABI StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
reverse_iterator rbegin()
Definition BasicBlock.h:475
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
InstListType::reverse_iterator reverse_iterator
Definition BasicBlock.h:172
reverse_iterator rend()
Definition BasicBlock.h:477
void setAttributes(AttributeList A)
Set the attributes for this call.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
ArrayRef< unsigned > getIndices() const
unsigned getNumIndices() const
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
BasicBlockListType::iterator iterator
Definition Function.h:69
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Definition IRBuilder.h:1901
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2633
LLVM_ABI CallInst * CreateLifetimeStart(Value *Ptr)
Create a lifetime.start intrinsic.
LLVM_ABI CallInst * CreateLifetimeEnd(Value *Ptr)
Create a lifetime.end intrinsic.
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition IRBuilder.h:1867
UnreachableInst * CreateUnreachable()
Definition IRBuilder.h:1339
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2626
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:202
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2202
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Definition IRBuilder.h:2241
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:201
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2336
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2289
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition IRBuilder.h:2497
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2332
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1197
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1850
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1492
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2085
LLVMContext & getContext() const
Definition IRBuilder.h:203
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1551
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2197
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2511
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition IRBuilder.h:1914
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1191
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition IRBuilder.h:1886
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2212
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
Definition IRBuilder.h:75
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:319
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
LLVM_ABI void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
An instruction for reading from memory.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
void setVolatile(bool V)
Specify whether this is a volatile load or not.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Metadata node.
Definition Metadata.h:1078
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
LLVMContext & getContext() const
Get the global data context.
Definition Module.h:285
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
virtual Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const
Perform a store-conditional operation to Addr.
EVT getMemValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
virtual void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const
Perform a bit test atomicrmw using a target-specific intrinsic.
virtual bool shouldInsertFencesForAtomic(const Instruction *I) const
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
virtual AtomicOrdering atomicOperationOrderAfterFenceSplit(const Instruction *I) const
virtual void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const
Perform a cmpxchg expansion using a target-specific method.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const
Perform a masked atomicrmw using a target-specific intrinsic.
virtual Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
virtual void emitExpandAtomicRMW(AtomicRMWInst *AI) const
Perform a atomicrmw expansion using a target-specific way.
virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const
virtual void emitExpandAtomicStore(StoreInst *SI) const
Perform a atomic store using a target-specific way.
virtual AtomicExpansionKind shouldCastAtomicRMWIInIR(AtomicRMWInst *RMWI) const
Returns how the given atomic atomicrmw should be cast by the IR-level AtomicExpand pass.
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
virtual bool shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const
Whether AtomicExpandPass should automatically insert a trailing fence without reducing the ordering f...
virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
virtual Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const
Perform a masked cmpxchg using a target-specific intrinsic.
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
virtual void emitExpandAtomicLoad(LoadInst *LI) const
Perform a atomic load using a target-specific way.
virtual AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
virtual void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const
Perform a atomicrmw which the result is only used by comparison, using a target-specific intrinsic.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
virtual AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
virtual Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const
virtual AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
virtual Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const
Inserts in the IR a target-specific intrinsic specifying a fence.
virtual LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const
On some platforms, an AtomicRMW that never actually modifies the value (such as fetch_add of 0) can b...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool canInstructionHaveMMRAs(const Instruction &I)
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isReleaseOrStronger(AtomicOrdering AO)
AtomicOrderingCABI toCABI(AtomicOrdering AO)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void initializeAtomicExpandLegacyPass(PassRegistry &)
function_ref< void( IRBuilderBase &, Value *, Value *, Value *, Align, AtomicOrdering, SyncScope::ID, Value *&, Value *&, Instruction *)> CreateCmpXchgInstFun
Parameters (see the expansion example below): (the builder, addr, loaded, new_val,...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
AtomicOrdering
Atomic ordering for LLVM's memory model.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createAtomicExpandLegacyPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
LLVM_ABI char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:412
Matching combinators.