70#define DEBUG_TYPE "openmp-ir-builder"
77 cl::desc(
"Use optimistic attributes describing "
78 "'as-if' properties of runtime calls."),
82 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
83 cl::desc(
"Factor for the unroll threshold to account for code "
84 "simplifications still taking place"),
88 "openmp-ir-builder-use-default-max-threads",
cl::Hidden,
99 if (!IP1.isSet() || !IP2.isSet())
101 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
106 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
107 case OMPScheduleType::UnorderedStaticChunked:
108 case OMPScheduleType::UnorderedStatic:
109 case OMPScheduleType::UnorderedDynamicChunked:
110 case OMPScheduleType::UnorderedGuidedChunked:
111 case OMPScheduleType::UnorderedRuntime:
112 case OMPScheduleType::UnorderedAuto:
113 case OMPScheduleType::UnorderedTrapezoidal:
114 case OMPScheduleType::UnorderedGreedy:
115 case OMPScheduleType::UnorderedBalanced:
116 case OMPScheduleType::UnorderedGuidedIterativeChunked:
117 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
118 case OMPScheduleType::UnorderedSteal:
119 case OMPScheduleType::UnorderedStaticBalancedChunked:
120 case OMPScheduleType::UnorderedGuidedSimd:
121 case OMPScheduleType::UnorderedRuntimeSimd:
122 case OMPScheduleType::OrderedStaticChunked:
123 case OMPScheduleType::OrderedStatic:
124 case OMPScheduleType::OrderedDynamicChunked:
125 case OMPScheduleType::OrderedGuidedChunked:
126 case OMPScheduleType::OrderedRuntime:
127 case OMPScheduleType::OrderedAuto:
128 case OMPScheduleType::OrderdTrapezoidal:
129 case OMPScheduleType::NomergeUnorderedStaticChunked:
130 case OMPScheduleType::NomergeUnorderedStatic:
131 case OMPScheduleType::NomergeUnorderedDynamicChunked:
132 case OMPScheduleType::NomergeUnorderedGuidedChunked:
133 case OMPScheduleType::NomergeUnorderedRuntime:
134 case OMPScheduleType::NomergeUnorderedAuto:
135 case OMPScheduleType::NomergeUnorderedTrapezoidal:
136 case OMPScheduleType::NomergeUnorderedGreedy:
137 case OMPScheduleType::NomergeUnorderedBalanced:
138 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
139 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
140 case OMPScheduleType::NomergeUnorderedSteal:
141 case OMPScheduleType::NomergeOrderedStaticChunked:
142 case OMPScheduleType::NomergeOrderedStatic:
143 case OMPScheduleType::NomergeOrderedDynamicChunked:
144 case OMPScheduleType::NomergeOrderedGuidedChunked:
145 case OMPScheduleType::NomergeOrderedRuntime:
146 case OMPScheduleType::NomergeOrderedAuto:
147 case OMPScheduleType::NomergeOrderedTrapezoidal:
148 case OMPScheduleType::OrderedDistributeChunked:
149 case OMPScheduleType::OrderedDistribute:
157 SchedType & OMPScheduleType::MonotonicityMask;
158 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
170 Builder.restoreIP(IP);
178 return T.isAMDGPU() ||
T.isNVPTX() ||
T.isSPIRV();
184 Kernel->getFnAttribute(
"target-features").getValueAsString();
185 if (Features.
count(
"+wavefrontsize64"))
200 bool HasSimdModifier,
bool HasDistScheduleChunks) {
202 switch (ClauseKind) {
203 case OMP_SCHEDULE_Default:
204 case OMP_SCHEDULE_Static:
205 return HasChunks ? OMPScheduleType::BaseStaticChunked
206 : OMPScheduleType::BaseStatic;
207 case OMP_SCHEDULE_Dynamic:
208 return OMPScheduleType::BaseDynamicChunked;
209 case OMP_SCHEDULE_Guided:
210 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
211 : OMPScheduleType::BaseGuidedChunked;
212 case OMP_SCHEDULE_Auto:
214 case OMP_SCHEDULE_Runtime:
215 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
216 : OMPScheduleType::BaseRuntime;
217 case OMP_SCHEDULE_Distribute:
218 return HasDistScheduleChunks ? OMPScheduleType::BaseDistributeChunked
219 : OMPScheduleType::BaseDistribute;
227 bool HasOrderedClause) {
228 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
229 OMPScheduleType::None &&
230 "Must not have ordering nor monotonicity flags already set");
233 ? OMPScheduleType::ModifierOrdered
234 : OMPScheduleType::ModifierUnordered;
235 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
238 if (OrderingScheduleType ==
239 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
240 return OMPScheduleType::OrderedGuidedChunked;
241 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
242 OMPScheduleType::ModifierOrdered))
243 return OMPScheduleType::OrderedRuntime;
245 return OrderingScheduleType;
251 bool HasSimdModifier,
bool HasMonotonic,
252 bool HasNonmonotonic,
bool HasOrderedClause) {
253 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
254 OMPScheduleType::None &&
255 "Must not have monotonicity flags already set");
256 assert((!HasMonotonic || !HasNonmonotonic) &&
257 "Monotonic and Nonmonotonic are contradicting each other");
260 return ScheduleType | OMPScheduleType::ModifierMonotonic;
261 }
else if (HasNonmonotonic) {
262 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
272 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
273 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
279 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
287 bool HasSimdModifier,
bool HasMonotonicModifier,
288 bool HasNonmonotonicModifier,
bool HasOrderedClause,
289 bool HasDistScheduleChunks) {
291 ClauseKind, HasChunks, HasSimdModifier, HasDistScheduleChunks);
295 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
296 HasNonmonotonicModifier, HasOrderedClause);
304static std::optional<omp::OMPTgtExecModeFlags>
309 if (
Call->getCalledFunction()->getName() ==
"__kmpc_target_init") {
310 TargetInitCall =
Call;
335 std::optional<omp::OMPTgtExecModeFlags> ExecMode =
347 if (
Instruction *Term = Source->getTerminatorOrNull()) {
356 NewBr->setDebugLoc(
DL);
361 assert(New->getFirstInsertionPt() == New->begin() &&
362 "Target BB must not have PHI nodes");
378 New->splice(New->begin(), Old, IP.
getPoint(), Old->
end());
382 NewBr->setDebugLoc(
DL);
394 Builder.SetInsertPoint(Old);
398 Builder.SetCurrentDebugLocation(
DebugLoc);
408 New->replaceSuccessorsPhiUsesWith(Old, New);
417 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
419 Builder.SetInsertPoint(Builder.GetInsertBlock());
422 Builder.SetCurrentDebugLocation(
DebugLoc);
431 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
433 Builder.SetInsertPoint(Builder.GetInsertBlock());
436 Builder.SetCurrentDebugLocation(
DebugLoc);
453 const Twine &Name =
"",
bool AsPtr =
true,
454 bool Is64Bit =
false) {
455 Builder.restoreIP(OuterAllocaIP);
459 Builder.CreateAlloca(IntTy,
nullptr, Name +
".addr");
463 FakeVal = FakeValAddr;
465 FakeVal = Builder.CreateLoad(IntTy, FakeValAddr, Name +
".val");
470 Builder.restoreIP(InnerAllocaIP);
473 UseFakeVal = Builder.CreateLoad(IntTy, FakeVal, Name +
".use");
476 FakeVal, Is64Bit ? Builder.getInt64(10) : Builder.getInt32(10)));
489enum OpenMPOffloadingRequiresDirFlags {
491 OMP_REQ_UNDEFINED = 0x000,
493 OMP_REQ_NONE = 0x001,
495 OMP_REQ_REVERSE_OFFLOAD = 0x002,
497 OMP_REQ_UNIFIED_ADDRESS = 0x004,
499 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
501 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
508 DominatorTree *DT =
nullptr,
bool AggregateArgs =
false,
509 BlockFrequencyInfo *BFI =
nullptr,
510 BranchProbabilityInfo *BPI =
nullptr,
511 AssumptionCache *AC =
nullptr,
bool AllowVarArgs =
false,
512 bool AllowAlloca =
false,
513 BasicBlock *AllocationBlock =
nullptr,
515 std::string Suffix =
"",
bool ArgsInZeroAddressSpace =
false)
516 : CodeExtractor(BBs, DT, AggregateArgs, BFI, BPI, AC, AllowVarArgs,
517 AllowAlloca, AllocationBlock, DeallocationBlocks, Suffix,
518 ArgsInZeroAddressSpace),
519 OMPBuilder(OMPBuilder) {}
521 virtual ~OMPCodeExtractor() =
default;
524 OpenMPIRBuilder &OMPBuilder;
527class DeviceSharedMemCodeExtractor :
public OMPCodeExtractor {
529 using OMPCodeExtractor::OMPCodeExtractor;
530 virtual ~DeviceSharedMemCodeExtractor() =
default;
534 allocateVar(IRBuilder<>::InsertPoint AllocaIP,
Type *VarType,
535 const Twine &Name = Twine(
""),
536 AddrSpaceCastInst **CastedAlloc =
nullptr)
override {
537 return OMPBuilder.createOMPAllocShared(AllocaIP, VarType, Name);
540 virtual Instruction *deallocateVar(IRBuilder<>::InsertPoint DeallocIP,
542 return OMPBuilder.createOMPFreeShared(DeallocIP, Var, VarType);
549 OpenMPIRBuilder &OMPBuilder;
551 DeviceSharedMemOutlineInfo(OpenMPIRBuilder &OMPBuilder)
552 : OMPBuilder(OMPBuilder) {}
553 virtual ~DeviceSharedMemOutlineInfo() =
default;
555 virtual std::unique_ptr<CodeExtractor>
557 bool ArgsInZeroAddressSpace,
558 Twine Suffix = Twine(
""))
override;
564 : RequiresFlags(OMP_REQ_UNDEFINED) {}
568 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
569 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
572 RequiresFlags(OMP_REQ_UNDEFINED) {
573 if (HasRequiresReverseOffload)
574 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
575 if (HasRequiresUnifiedAddress)
576 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
577 if (HasRequiresUnifiedSharedMemory)
578 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
579 if (HasRequiresDynamicAllocators)
580 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
584 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
588 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
592 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
596 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
601 :
static_cast<int64_t
>(OMP_REQ_NONE);
606 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
608 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
613 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
615 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
620 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
622 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
627 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
629 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
642 constexpr size_t MaxDim = 3;
647 Value *DynCGroupMemFallbackFlag =
649 DynCGroupMemFallbackFlag =
Builder.CreateShl(DynCGroupMemFallbackFlag, 2);
652 StrictFlag =
Builder.CreateShl(StrictFlag, 6);
654 Value *Flags =
Builder.CreateOr(HasNoWaitFlag, DynCGroupMemFallbackFlag);
655 Flags =
Builder.CreateOr(Flags, StrictFlag);
661 Value *NumThreads3D =
692 auto FnAttrs = Attrs.getFnAttrs();
693 auto RetAttrs = Attrs.getRetAttrs();
695 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
700 bool Param =
true) ->
void {
701 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
702 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
703 if (HasSignExt || HasZeroExt) {
704 assert(AS.getNumAttributes() == 1 &&
705 "Currently not handling extension attr combined with others.");
707 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
710 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
717#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
718#include "llvm/Frontend/OpenMP/OMPKinds.def"
722#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
724 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
725 addAttrSet(RetAttrs, RetAttrSet, false); \
726 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
727 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
728 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
730#include "llvm/Frontend/OpenMP/OMPKinds.def"
744#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
746 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
748 Fn = M.getFunction(Str); \
750#include "llvm/Frontend/OpenMP/OMPKinds.def"
756#define OMP_RTL(Enum, Str, ...) \
758 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
760#include "llvm/Frontend/OpenMP/OMPKinds.def"
764 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
774 LLVMContext::MD_callback,
776 2, {-1, -1},
true)}));
789 assert(Fn &&
"Failed to create OpenMP runtime function");
800 Builder.SetInsertPoint(FiniBB);
812 FiniBB = OtherFiniBB;
814 Builder.SetInsertPoint(FiniBB->getFirstNonPHIIt());
822 auto EndIt = FiniBB->end();
823 if (FiniBB->size() >= 1)
824 if (
auto Prev = std::prev(EndIt); Prev->isTerminator())
829 FiniBB->replaceAllUsesWith(OtherFiniBB);
830 FiniBB->eraseFromParent();
831 FiniBB = OtherFiniBB;
838 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
861 for (
auto Inst =
Block->getReverseIterator()->begin();
862 Inst !=
Block->getReverseIterator()->end();) {
891 Block.getParent()->getEntryBlock().getTerminator()->getIterator();
912 DeferredOutlines.
push_back(std::move(OI));
916 ParallelRegionBlockSet.
clear();
918 OI->collectBlocks(ParallelRegionBlockSet, Blocks);
928 bool ArgsInZeroAddressSpace =
Config.isTargetDevice();
929 std::unique_ptr<CodeExtractor> Extractor =
930 OI->createCodeExtractor(Blocks, ArgsInZeroAddressSpace,
".omp_par");
934 <<
" Exit: " << OI->ExitBB->getName() <<
"\n");
935 assert(Extractor->isEligible() &&
936 "Expected OpenMP outlining to be possible!");
938 for (
auto *V : OI->ExcludeArgsFromAggregate)
939 Extractor->excludeArgFromAggregate(V);
942 Extractor->extractCodeRegion(CEAC, OI->Inputs, OI->Outputs);
946 if (TargetCpuAttr.isStringAttribute())
949 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
950 if (TargetFeaturesAttr.isStringAttribute())
951 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
954 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
956 "OpenMP outlined functions should not return a value!");
961 M.getFunctionList().insertAfter(OuterFn->
getIterator(), OutlinedFn);
968 assert(OI->EntryBB->getUniquePredecessor() == &ArtificialEntry);
975 "Expected instructions to add in the outlined region entry");
977 End = ArtificialEntry.
rend();
982 if (
I.isTerminator()) {
984 if (
Instruction *TI = OI->EntryBB->getTerminatorOrNull())
985 TI->adoptDbgRecords(&ArtificialEntry,
I.getIterator(),
false);
989 I.moveBeforePreserving(*OI->EntryBB,
990 OI->EntryBB->getFirstInsertionPt());
993 OI->EntryBB->moveBefore(&ArtificialEntry);
1000 if (OI->PostOutlineCB)
1001 OI->PostOutlineCB(*OutlinedFn);
1003 if (OI->FixUpNonEntryAllocas)
1035 errs() <<
"Error of kind: " << Kind
1036 <<
" when emitting offload entries and metadata during "
1037 "OMPIRBuilder finalization \n";
1043 if (
Config.EmitLLVMUsedMetaInfo.value_or(
false)) {
1044 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
1045 M.getGlobalVariable(
"__openmp_nvptx_data_transfer_temporary_storage")};
1046 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
1063 ConstantInt::get(I32Ty,
Value), Name);
1076 for (
unsigned I = 0, E =
List.size();
I != E; ++
I)
1080 if (UsedArray.
empty())
1087 GV->setSection(
"llvm.metadata");
1093 auto *Int8Ty =
Builder.getInt8Ty();
1096 ConstantInt::get(Int8Ty, Mode),
Twine(KernelName,
"_exec_mode"));
1104 unsigned Reserve2Flags) {
1106 LocFlags |= OMP_IDENT_FLAG_KMPC;
1113 ConstantInt::get(Int32,
uint32_t(LocFlags)),
1114 ConstantInt::get(Int32, Reserve2Flags),
1115 ConstantInt::get(Int32, SrcLocStrSize), SrcLocStr};
1117 size_t SrcLocStrArgIdx = 4;
1118 if (OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx)
1122 SrcLocStr, OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx));
1129 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
1130 if (
GV.getInitializer() == Initializer)
1135 M, OpenMPIRBuilder::Ident,
1138 M.getDataLayout().getDefaultGlobalsAddressSpace());
1150 SrcLocStrSize = LocStr.
size();
1159 if (
GV.isConstant() &&
GV.hasInitializer() &&
1160 GV.getInitializer() == Initializer)
1163 SrcLocStr =
Builder.CreateGlobalString(
1164 LocStr,
"",
M.getDataLayout().getDefaultGlobalsAddressSpace(),
1172 unsigned Line,
unsigned Column,
1178 Buffer.
append(FunctionName);
1180 Buffer.
append(std::to_string(Line));
1182 Buffer.
append(std::to_string(Column));
1190 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
1201 !DIL->getFilename().empty() ? DIL->getFilename() :
M.getName();
1206 DIL->getColumn(), SrcLocStrSize);
1212 Loc.IP.getBlock()->getParent());
1218 "omp_global_thread_num");
1223 bool ForceSimpleCall,
bool CheckCancelFlag) {
1233 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
1236 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
1239 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
1242 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
1245 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
1258 bool UseCancelBarrier =
1263 ? OMPRTL___kmpc_cancel_barrier
1264 : OMPRTL___kmpc_barrier),
1267 if (UseCancelBarrier && CheckCancelFlag)
1277 omp::Directive CanceledDirective) {
1282 auto *UI =
Builder.CreateUnreachable();
1290 Builder.SetInsertPoint(ElseTI);
1291 auto ElseIP =
Builder.saveIP();
1299 Builder.SetInsertPoint(ThenTI);
1301 Value *CancelKind =
nullptr;
1302 switch (CanceledDirective) {
1303#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1304 case DirectiveEnum: \
1305 CancelKind = Builder.getInt32(Value); \
1307#include "llvm/Frontend/OpenMP/OMPKinds.def"
1324 Builder.SetInsertPoint(UI->getParent());
1325 UI->eraseFromParent();
1332 omp::Directive CanceledDirective) {
1337 auto *UI =
Builder.CreateUnreachable();
1340 Value *CancelKind =
nullptr;
1341 switch (CanceledDirective) {
1342#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1343 case DirectiveEnum: \
1344 CancelKind = Builder.getInt32(Value); \
1346#include "llvm/Frontend/OpenMP/OMPKinds.def"
1363 Builder.SetInsertPoint(UI->getParent());
1364 UI->eraseFromParent();
1377 auto *KernelArgsPtr =
1378 Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs,
nullptr,
"kernel_args");
1383 Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr,
I);
1386 M.getDataLayout().getPrefTypeAlign(KernelArgs[
I]->getType()));
1390 NumThreads, HostPtr, KernelArgsPtr};
1417 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1421 Value *Return =
nullptr;
1441 Builder, AllocaIP, Return, RTLoc, DeviceID, Args.NumTeams.front(),
1442 Args.NumThreads.front(), OutlinedFnID, ArgsVector));
1449 Builder.CreateCondBr(
Failed, OffloadFailedBlock, OffloadContBlock);
1451 auto CurFn =
Builder.GetInsertBlock()->getParent();
1458 emitBlock(OffloadContBlock, CurFn,
true);
1463 Value *CancelFlag, omp::Directive CanceledDirective) {
1465 "Unexpected cancellation!");
1485 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
1494 Builder.SetInsertPoint(CancellationBlock);
1495 Builder.CreateBr(*FiniBBOrErr);
1498 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->
begin());
1510 size_t NumArgs = OutlinedFn.
arg_size();
1511 assert((NumArgs == 2 || NumArgs == 3) &&
1512 "expected a 2-3 argument parallel outlined function");
1513 bool UseArgStruct = NumArgs == 3;
1518 {Builder.getInt16Ty(), Builder.getInt32Ty()},
1522 OutlinedFn.
getName() +
".wrapper", OMPIRBuilder->
M);
1524 WrapperFn->addParamAttr(0, Attribute::NoUndef);
1525 WrapperFn->addParamAttr(0, Attribute::ZExt);
1526 WrapperFn->addParamAttr(1, Attribute::NoUndef);
1530 Builder.SetInsertPoint(EntryBB);
1533 Value *AddrAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
1535 AddrAlloca = Builder.CreatePointerBitCastOrAddrSpaceCast(
1536 AddrAlloca, Builder.getPtrTy(0),
1537 AddrAlloca->
getName() +
".ascast");
1539 Value *ZeroAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
1541 ZeroAlloca = Builder.CreatePointerBitCastOrAddrSpaceCast(
1542 ZeroAlloca, Builder.getPtrTy(0),
1543 ZeroAlloca->
getName() +
".ascast");
1545 Value *ArgsAlloca =
nullptr;
1547 ArgsAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
1548 nullptr,
"global_args");
1549 ArgsAlloca = Builder.CreatePointerBitCastOrAddrSpaceCast(
1550 ArgsAlloca, Builder.getPtrTy(0),
1551 ArgsAlloca->
getName() +
".ascast");
1555 Builder.CreateStore(WrapperFn->getArg(1), AddrAlloca);
1556 Builder.CreateStore(Builder.getInt32(0), ZeroAlloca);
1560 llvm::omp::RuntimeFunction::OMPRTL___kmpc_get_shared_variables),
1568 Value *StructArg = Builder.CreateLoad(Builder.getPtrTy(), ArgsAlloca);
1569 StructArg = Builder.CreateInBoundsGEP(Builder.getPtrTy(), StructArg,
1570 {Builder.getInt64(0)});
1571 StructArg = Builder.CreateLoad(Builder.getPtrTy(), StructArg,
"structArg");
1572 Args.push_back(StructArg);
1576 Builder.CreateCall(&OutlinedFn, Args);
1577 Builder.CreateRetVoid();
1592 "Expected at least tid and bounded tid as arguments");
1593 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1601 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1604 assert(CI &&
"Expected call instruction to outlined function");
1605 CI->
getParent()->setName(
"omp_parallel");
1607 Builder.SetInsertPoint(CI);
1608 Type *PtrTy = OMPIRBuilder->VoidPtr;
1611 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
1615 Value *Args = ArgsAlloca;
1619 Args = Builder.CreatePointerCast(ArgsAlloca, PtrTy);
1620 Builder.restoreIP(CurrentIP);
1623 for (
unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
1625 Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
1627 Builder.CreateStore(V, StoreAddress);
1631 IfCondition ? Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32)
1632 : Builder.getInt32(1);
1633 Value *NumThreadsArg =
1634 NumThreads ? Builder.CreateZExtOrTrunc(NumThreads, OMPIRBuilder->Int32)
1635 : Builder.getInt32(-1);
1645 Value *Parallel60CallArgs[] = {
1650 Builder.getInt32(-1),
1654 Builder.getInt64(NumCapturedVars),
1655 Builder.getInt32(0)};
1663 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1666 Builder.SetInsertPoint(PrivTID);
1668 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1675 I->eraseFromParent();
1698 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1706 F->addMetadata(LLVMContext::MD_callback,
1715 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1718 "Expected at least tid and bounded tid as arguments");
1719 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1722 CI->
getParent()->setName(
"omp_parallel");
1723 Builder.SetInsertPoint(CI);
1726 Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
1730 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1732 Value *
Cond = Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32);
1739 auto PtrTy = OMPIRBuilder->VoidPtr;
1740 if (IfCondition && NumCapturedVars == 0) {
1748 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1751 Builder.SetInsertPoint(PrivTID);
1753 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1760 I->eraseFromParent();
1768 Value *NumThreads, omp::ProcBindKind ProcBind,
bool IsCancellable) {
1777 const bool NeedThreadID = NumThreads ||
Config.isTargetDevice() ||
1778 (ProcBind != OMP_PROC_BIND_default);
1785 bool ArgsInZeroAddressSpace =
Config.isTargetDevice();
1789 if (NumThreads && !
Config.isTargetDevice()) {
1792 Builder.CreateIntCast(NumThreads, Int32,
false)};
1797 if (ProcBind != OMP_PROC_BIND_default) {
1801 ConstantInt::get(Int32,
unsigned(ProcBind),
true)};
1823 Builder.CreateAlloca(Int32,
nullptr,
"zero.addr");
1826 if (ArgsInZeroAddressSpace &&
M.getDataLayout().getAllocaAddrSpace() != 0) {
1829 TIDAddrAlloca, PointerType ::get(
M.getContext(), 0),
"tid.addr.ascast");
1833 PointerType ::get(
M.getContext(), 0),
1834 "zero.addr.ascast");
1858 if (IP.getBlock()->end() == IP.getPoint()) {
1864 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
1865 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
1866 "Unexpected insertion point for finalization call!");
1878 Builder.CreateAlloca(Int32,
nullptr,
"tid.addr.local");
1884 Builder.CreateLoad(Int32, ZeroAddr,
"zero.addr.use");
1902 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1905 assert(BodyGenCB &&
"Expected body generation callback!");
1907 if (
Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP, PRegExitBB))
1910 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1914 bool UsesDeviceSharedMemory =
1916 std::unique_ptr<OutlineInfo> OI =
1917 UsesDeviceSharedMemory
1918 ? std::make_unique<DeviceSharedMemOutlineInfo>(*
this)
1919 : std::make_unique<OutlineInfo>();
1921 if (
Config.isTargetDevice()) {
1923 OI->PostOutlineCB = [=, ToBeDeletedVec =
1924 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1926 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1927 ThreadID, ToBeDeletedVec);
1931 OI->PostOutlineCB = [=, ToBeDeletedVec =
1932 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1934 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1938 OI->FixUpNonEntryAllocas =
true;
1939 OI->OuterAllocBB = OuterAllocaBlock;
1940 OI->EntryBB = PRegEntryBB;
1941 OI->ExitBB = PRegExitBB;
1942 OI->OuterDeallocBBs.reserve(OuterDeallocBlocks.
size());
1943 copy(OuterDeallocBlocks, OI->OuterDeallocBBs.
end());
1947 OI->collectBlocks(ParallelRegionBlockSet, Blocks);
1959 ".omp_par", ArgsInZeroAddressSpace);
1964 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1966 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands,
1971 return GV->getValueType() == OpenMPIRBuilder::Ident;
1976 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1982 if (&V == TIDAddr || &V == ZeroAddr) {
1983 OI->ExcludeArgsFromAggregate.push_back(&V);
1988 for (
Use &U : V.uses())
1990 if (ParallelRegionBlockSet.
count(UserI->getParent()))
2000 if (!V.getType()->isPointerTy()) {
2004 Builder.restoreIP(OuterAllocIP);
2006 if (UsesDeviceSharedMemory) {
2009 V.getName() +
".reloaded");
2010 for (
BasicBlock *DeallocBlock : OuterDeallocBlocks)
2012 InsertPointTy(DeallocBlock, DeallocBlock->getFirstInsertionPt()),
2015 Ptr =
Builder.CreateAlloca(V.getType(),
nullptr,
2016 V.getName() +
".reloaded");
2021 Builder.SetInsertPoint(InsertBB,
2026 Builder.restoreIP(InnerAllocaIP);
2027 Inner =
Builder.CreateLoad(V.getType(), Ptr);
2030 Value *ReplacementValue =
nullptr;
2033 ReplacementValue = PrivTID;
2036 PrivCB(InnerAllocaIP,
Builder.saveIP(), V, *Inner, ReplacementValue);
2044 assert(ReplacementValue &&
2045 "Expected copy/create callback to set replacement value!");
2046 if (ReplacementValue == &V)
2051 UPtr->set(ReplacementValue);
2076 for (
Value *Output : Outputs)
2080 "OpenMP outlining should not produce live-out values!");
2082 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
2084 for (
auto *BB : Blocks)
2085 dbgs() <<
" PBR: " << BB->getName() <<
"\n";
2093 assert(FiniInfo.DK == OMPD_parallel &&
2094 "Unexpected finalization stack state!");
2105 Builder.CreateBr(*FiniBBOrErr);
2109 Term->eraseFromParent();
2115 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
2116 UI->eraseFromParent();
2179 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
2181 Builder.CreateStore(DepValPtr, Addr);
2184 DependInfo, Entry,
static_cast<unsigned int>(RTLDependInfoFields::Len));
2186 ConstantInt::get(SizeTy,
2191 DependInfo, Entry,
static_cast<unsigned int>(RTLDependInfoFields::Flags));
2193 static_cast<unsigned int>(Dep.
DepKind)),
2206 if (Dependencies.
empty())
2226 Type *DependInfo = OMPBuilder.DependInfo;
2228 Value *DepArray =
nullptr;
2230 Builder.SetInsertPoint(
2234 DepArray = Builder.CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
2236 Builder.restoreIP(OldIP);
2238 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
2240 Builder.CreateConstInBoundsGEP2_64(DepArrayTy, DepArray, 0, DepIdx);
2247Expected<Value *> OpenMPIRBuilder::createTaskDuplicationFunction(
2249 unsigned ProgramAddressSpace =
M.getDataLayout().getProgramAddressSpace();
2264 Builder.getVoidTy(), {VoidPtrTy, VoidPtrTy, Builder.getInt32Ty()},
2268 "omp_taskloop_dup",
M);
2271 Value *LastprivateFlagArg = DupFunction->
getArg(2);
2272 DestTaskArg->
setName(
"dest_task");
2273 SrcTaskArg->
setName(
"src_task");
2274 LastprivateFlagArg->
setName(
"lastprivate_flag");
2276 IRBuilderBase::InsertPointGuard Guard(
Builder);
2280 auto GetTaskContextPtrFromArg = [&](
Value *Arg) ->
Value * {
2281 Type *TaskWithPrivatesTy =
2284 TaskWithPrivatesTy, Arg, {
Builder.getInt32(0),
Builder.getInt32(1)});
2286 PrivatesTy, TaskPrivates,
2291 Value *DestTaskContextPtr = GetTaskContextPtrFromArg(DestTaskArg);
2292 Value *SrcTaskContextPtr = GetTaskContextPtrFromArg(SrcTaskArg);
2294 DestTaskContextPtr->
setName(
"destPtr");
2295 SrcTaskContextPtr->
setName(
"srcPtr");
2300 Expected<IRBuilderBase::InsertPoint> AfterIPOrError =
2301 DupCB(AllocaIP, CodeGenIP, DestTaskContextPtr, SrcTaskContextPtr);
2302 if (!AfterIPOrError)
2304 Builder.restoreIP(*AfterIPOrError);
2314 llvm::function_ref<llvm::Expected<llvm::CanonicalLoopInfo *>()> LoopInfo,
2316 Value *GrainSize,
bool NoGroup,
int Sched,
Value *Final,
bool Mergeable,
2318 Value *TaskContextStructPtrVal) {
2323 uint32_t SrcLocStrSize;
2339 if (
Error Err = BodyGenCB(TaskloopAllocaIP, TaskloopBodyIP, TaskloopExitBB))
2342 llvm::Expected<llvm::CanonicalLoopInfo *> result = LoopInfo();
2347 llvm::CanonicalLoopInfo *CLI = result.
get();
2348 auto OI = std::make_unique<OutlineInfo>();
2349 OI->EntryBB = TaskloopAllocaBB;
2350 OI->OuterAllocBB = AllocaIP.getBlock();
2351 OI->ExitBB = TaskloopExitBB;
2352 OI->OuterDeallocBBs.reserve(DeallocBlocks.
size());
2353 copy(DeallocBlocks, OI->OuterDeallocBBs.end());
2359 Builder, AllocaIP, ToBeDeleted, TaskloopAllocaIP,
"global.tid",
false));
2361 TaskloopAllocaIP,
"lb",
false,
true);
2363 TaskloopAllocaIP,
"ub",
false,
true);
2365 TaskloopAllocaIP,
"step",
false,
true);
2368 OI->Inputs.insert(FakeLB);
2369 OI->Inputs.insert(FakeUB);
2370 OI->Inputs.insert(FakeStep);
2371 if (TaskContextStructPtrVal)
2372 OI->Inputs.insert(TaskContextStructPtrVal);
2373 assert(((TaskContextStructPtrVal && DupCB) ||
2374 (!TaskContextStructPtrVal && !DupCB)) &&
2375 "Task context struct ptr and duplication callback must be both set "
2381 unsigned ProgramAddressSpace =
M.getDataLayout().getProgramAddressSpace();
2385 {FakeLB->getType(), FakeUB->getType(), FakeStep->getType(), PointerTy});
2386 Expected<Value *> TaskDupFnOrErr = createTaskDuplicationFunction(
2389 if (!TaskDupFnOrErr) {
2392 Value *TaskDupFn = *TaskDupFnOrErr;
2394 OI->PostOutlineCB = [
this, Ident, LBVal, UBVal, StepVal, Untied,
2395 TaskloopAllocaBB, CLI, TaskDupFn, ToBeDeleted, IfCond,
2396 GrainSize, NoGroup, Sched, FakeLB, FakeUB, FakeStep,
2397 FakeSharedsTy, Final, Mergeable, Priority,
2398 NumOfCollapseLoops](
Function &OutlinedFn)
mutable {
2400 assert(OutlinedFn.hasOneUse() &&
2401 "there must be a single user for the outlined function");
2408 Value *CastedLBVal =
2409 Builder.CreateIntCast(LBVal,
Builder.getInt64Ty(),
true,
"lb64");
2410 Value *CastedUBVal =
2411 Builder.CreateIntCast(UBVal,
Builder.getInt64Ty(),
true,
"ub64");
2412 Value *CastedStepVal =
2413 Builder.CreateIntCast(StepVal,
Builder.getInt64Ty(),
true,
"step64");
2415 Builder.SetInsertPoint(StaleCI);
2428 Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
2449 divideCeil(
M.getDataLayout().getTypeSizeInBits(Task), 8));
2451 AllocaInst *ArgStructAlloca =
2453 assert(ArgStructAlloca &&
2454 "Unable to find the alloca instruction corresponding to arguments "
2455 "for extracted function");
2456 std::optional<TypeSize> ArgAllocSize =
2459 "Unable to determine size of arguments for extracted function");
2460 Value *SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
2465 CallInst *TaskData =
Builder.CreateCall(
2466 TaskAllocFn, {Ident, ThreadID,
Flags,
2467 TaskSize, SharedsSize,
2472 Value *TaskShareds =
Builder.CreateLoad(VoidPtr, TaskData);
2473 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2478 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(0)});
2481 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(1)});
2484 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(2)});
2490 IfCond ?
Builder.CreateIntCast(IfCond,
Builder.getInt32Ty(),
true)
2496 Value *GrainSizeVal =
2497 GrainSize ?
Builder.CreateIntCast(GrainSize,
Builder.getInt64Ty(),
true)
2499 Value *TaskDup = TaskDupFn;
2501 Value *
Args[] = {Ident, ThreadID, TaskData, IfCondVal, Lb, Ub,
2502 Loadstep, NoGroupVal, SchedVal, GrainSizeVal, TaskDup};
2507 Builder.CreateCall(TaskloopFn, Args);
2514 Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
2519 Builder.SetInsertPoint(TaskloopAllocaBB, TaskloopAllocaBB->begin());
2521 LoadInst *SharedsOutlined =
2522 Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
2523 OutlinedFn.getArg(1)->replaceUsesWithIf(
2525 [SharedsOutlined](Use &U) {
return U.getUser() != SharedsOutlined; });
2528 Type *IVTy =
IV->getType();
2534 Value *TaskLB =
nullptr;
2535 Value *TaskUB =
nullptr;
2536 Value *TaskStep =
nullptr;
2537 Value *LoadTaskLB =
nullptr;
2538 Value *LoadTaskUB =
nullptr;
2539 Value *LoadTaskStep =
nullptr;
2540 for (Instruction &
I : *TaskloopAllocaBB) {
2541 if (
I.getOpcode() == Instruction::GetElementPtr) {
2544 switch (CI->getZExtValue()) {
2556 }
else if (
I.getOpcode() == Instruction::Load) {
2558 if (
Load.getPointerOperand() == TaskLB) {
2559 assert(TaskLB !=
nullptr &&
"Expected value for TaskLB");
2561 }
else if (
Load.getPointerOperand() == TaskUB) {
2562 assert(TaskUB !=
nullptr &&
"Expected value for TaskUB");
2564 }
else if (
Load.getPointerOperand() == TaskStep) {
2565 assert(TaskStep !=
nullptr &&
"Expected value for TaskStep");
2571 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
2573 assert(LoadTaskLB !=
nullptr &&
"Expected value for LoadTaskLB");
2574 assert(LoadTaskUB !=
nullptr &&
"Expected value for LoadTaskUB");
2575 assert(LoadTaskStep !=
nullptr &&
"Expected value for LoadTaskStep");
2577 Builder.CreateSub(LoadTaskUB, LoadTaskLB), LoadTaskStep);
2578 Value *TripCount =
Builder.CreateAdd(TripCountMinusOne, One,
"trip_cnt");
2579 Value *CastedTripCount =
Builder.CreateIntCast(TripCount, IVTy,
true);
2580 Value *CastedTaskLB =
Builder.CreateIntCast(LoadTaskLB, IVTy,
true);
2582 CLI->setTripCount(CastedTripCount);
2584 Builder.SetInsertPoint(CLI->getBody(),
2585 CLI->getBody()->getFirstInsertionPt());
2587 if (NumOfCollapseLoops > 1) {
2593 Builder.CreateSub(CastedTaskLB, ConstantInt::get(IVTy, 1)));
2596 for (
auto IVUse = CLI->getIndVar()->uses().begin();
2597 IVUse != CLI->getIndVar()->uses().end(); IVUse++) {
2598 User *IVUser = IVUse->getUser();
2600 if (
Op->getOpcode() == Instruction::URem ||
2601 Op->getOpcode() == Instruction::UDiv) {
2606 for (User *User : UsersToReplace) {
2607 User->replaceUsesOfWith(CLI->getIndVar(), IVPlusTaskLB);
2624 assert(CLI->getIndVar()->getNumUses() == 3 &&
2625 "Canonical loop should have exactly three uses of the ind var");
2626 for (User *IVUser : CLI->getIndVar()->users()) {
2628 if (
Mul->getOpcode() == Instruction::Mul) {
2629 for (User *MulUser :
Mul->users()) {
2631 if (
Add->getOpcode() == Instruction::Add) {
2632 Add->setOperand(1, CastedTaskLB);
2641 FakeLB->replaceAllUsesWith(CastedLBVal);
2642 FakeUB->replaceAllUsesWith(CastedUBVal);
2643 FakeStep->replaceAllUsesWith(CastedStepVal);
2645 I->eraseFromParent();
2650 Builder.SetInsertPoint(TaskloopExitBB, TaskloopExitBB->
begin());
2656 M.getContext(),
M.getDataLayout().getPointerSizeInBits());
2666 bool Mergeable,
Value *EventHandle,
Value *Priority) {
2698 if (
Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP, TaskExitBB))
2701 auto OI = std::make_unique<OutlineInfo>();
2702 OI->EntryBB = TaskAllocaBB;
2703 OI->OuterAllocBB = AllocaIP.
getBlock();
2704 OI->ExitBB = TaskExitBB;
2705 OI->OuterDeallocBBs.reserve(DeallocBlocks.
size());
2706 copy(DeallocBlocks, OI->OuterDeallocBBs.
end());
2711 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
2713 OI->PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
2714 Affinities, Mergeable, Priority, EventHandle,
2716 ToBeDeleted](
Function &OutlinedFn)
mutable {
2718 assert(OutlinedFn.hasOneUse() &&
2719 "there must be a single user for the outlined function");
2724 bool HasShareds = StaleCI->
arg_size() > 1;
2725 Builder.SetInsertPoint(StaleCI);
2750 bool UseMergedIf0Path = ConstIfCondition && ConstIfCondition->isZero();
2754 Flags =
Builder.CreateOr(FinalFlag, Flags);
2757 if (Mergeable || UseMergedIf0Path)
2769 divideCeil(
M.getDataLayout().getTypeSizeInBits(Task), 8));
2778 assert(ArgStructAlloca &&
2779 "Unable to find the alloca instruction corresponding to arguments "
2780 "for extracted function");
2781 std::optional<TypeSize> ArgAllocSize =
2784 "Unable to determine size of arguments for extracted function");
2785 SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
2791 TaskAllocFn, {Ident, ThreadID, Flags,
2792 TaskSize, SharedsSize,
2795 if (Affinities.
Count && Affinities.
Info) {
2797 OMPRTL___kmpc_omp_reg_task_with_affinity);
2808 OMPRTL___kmpc_task_allow_completion_event);
2812 Builder.CreatePointerBitCastOrAddrSpaceCast(EventHandle,
2814 EventVal =
Builder.CreatePtrToInt(EventVal,
Builder.getInt64Ty());
2815 Builder.CreateStore(EventVal, EventHandleAddr);
2821 Value *TaskShareds =
Builder.CreateLoad(VoidPtr, TaskData);
2822 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2836 Constant *Zero = ConstantInt::get(Int32Ty, 0);
2840 Builder.CreateInBoundsGEP(TaskPtr, TaskData, {Zero, Zero});
2843 VoidPtr, VoidPtr,
Builder.getInt32Ty(), VoidPtr, VoidPtr);
2845 TaskStructType, TaskGEP, {Zero, ConstantInt::get(Int32Ty, 4)});
2848 Value *CmplrData =
Builder.CreateInBoundsGEP(CmplrStructType,
2849 PriorityData, {Zero, Zero});
2850 Builder.CreateStore(Priority, CmplrData);
2853 Value *DepArray =
nullptr;
2854 Value *NumDeps =
nullptr;
2857 NumDeps = Dependencies.
NumDeps;
2858 }
else if (!Dependencies.
Deps.empty()) {
2860 NumDeps =
Builder.getInt32(Dependencies.
Deps.size());
2880 if (IfCondition && !UseMergedIf0Path) {
2885 Builder.GetInsertPoint()->getParent()->getTerminator();
2886 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
2887 Builder.SetInsertPoint(IfTerminator);
2890 Builder.SetInsertPoint(ElseTI);
2897 {Ident, ThreadID, NumDeps, DepArray,
2898 ConstantInt::get(
Builder.getInt32Ty(), 0),
2913 Builder.SetInsertPoint(ThenTI);
2921 {Ident, ThreadID, TaskData, NumDeps, DepArray,
2922 ConstantInt::get(
Builder.getInt32Ty(), 0),
2933 Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->
begin());
2935 LoadInst *Shareds =
Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
2936 OutlinedFn.getArg(1)->replaceUsesWithIf(
2937 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2941 I->eraseFromParent();
2945 Builder.SetInsertPoint(TaskExitBB, TaskExitBB->
begin());
2967 if (
Error Err = BodyGenCB(AllocaIP,
Builder.saveIP(), DeallocBlocks))
2970 Builder.SetInsertPoint(TaskgroupExitBB);
3013 unsigned CaseNumber = 0;
3014 for (
auto SectionCB : SectionCBs) {
3016 M.getContext(),
"omp_section_loop.body.case", CurFn,
Continue);
3018 Builder.SetInsertPoint(CaseBB);
3022 {CaseEndBr->getParent(), CaseEndBr->getIterator()}, {}))
3033 Value *LB = ConstantInt::get(I32Ty, 0);
3034 Value *UB = ConstantInt::get(I32Ty, SectionCBs.
size());
3035 Value *ST = ConstantInt::get(I32Ty, 1);
3037 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
3042 applyStaticWorkshareLoop(
Loc.DL, *
LoopInfo, AllocaIP,
3043 WorksharingLoopType::ForStaticLoop, !IsNowait);
3049 assert(LoopFini &&
"Bad structure of static workshare loop finalization");
3053 assert(FiniInfo.DK == OMPD_sections &&
3054 "Unexpected finalization stack state!");
3055 if (
Error Err = FiniInfo.mergeFiniBB(
Builder, LoopFini))
3069 if (IP.getBlock()->end() != IP.getPoint())
3080 auto *CaseBB =
Loc.IP.getBlock();
3081 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
3082 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
3088 Directive OMPD = Directive::OMPD_sections;
3091 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
3102Value *OpenMPIRBuilder::getGPUThreadID() {
3105 OMPRTL___kmpc_get_hardware_thread_id_in_block),
3109Value *OpenMPIRBuilder::getGPUWarpSize() {
3114Value *OpenMPIRBuilder::getNVPTXWarpID() {
3115 unsigned LaneIDBits =
Log2_32(
Config.getGridValue().GV_Warp_Size);
3116 return Builder.CreateAShr(getGPUThreadID(), LaneIDBits,
"nvptx_warp_id");
3119Value *OpenMPIRBuilder::getNVPTXLaneID() {
3120 unsigned LaneIDBits =
Log2_32(
Config.getGridValue().GV_Warp_Size);
3121 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
3122 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
3123 return Builder.CreateAnd(getGPUThreadID(),
Builder.getInt32(LaneIDMask),
3130 uint64_t FromSize =
M.getDataLayout().getTypeStoreSize(FromType);
3131 uint64_t ToSize =
M.getDataLayout().getTypeStoreSize(ToType);
3132 assert(FromSize > 0 &&
"From size must be greater than zero");
3133 assert(ToSize > 0 &&
"To size must be greater than zero");
3134 if (FromType == ToType)
3136 if (FromSize == ToSize)
3137 return Builder.CreateBitCast(From, ToType);
3139 return Builder.CreateIntCast(From, ToType,
true);
3145 Value *ValCastItem =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3146 CastItem,
Builder.getPtrTy(0));
3147 Builder.CreateStore(From, ValCastItem);
3148 return Builder.CreateLoad(ToType, CastItem);
3155 uint64_t
Size =
M.getDataLayout().getTypeStoreSize(ElementType);
3156 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
3160 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
3162 Builder.CreateIntCast(getGPUWarpSize(),
Builder.getInt16Ty(),
true);
3164 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
3165 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
3166 Value *WarpSizeCast =
3168 Value *ShuffleCall =
3170 return castValueToType(AllocaIP, ShuffleCall, CastTy);
3177 uint64_t
Size =
M.getDataLayout().getTypeStoreSize(ElemType);
3189 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3190 Value *ElemPtr = DstAddr;
3191 Value *Ptr = SrcAddr;
3192 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
3196 Ptr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3199 Builder.CreateGEP(ElemType, SrcAddr, {ConstantInt::get(IndexTy, 1)});
3200 ElemPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3204 if ((
Size / IntSize) > 1) {
3205 Value *PtrEnd =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3206 SrcAddrGEP,
Builder.getPtrTy());
3223 Builder.CreatePointerBitCastOrAddrSpaceCast(Ptr,
Builder.getPtrTy()));
3225 Builder.CreateICmpSGT(PtrDiff,
Builder.getInt64(IntSize - 1)), ThenBB,
3228 Value *Res = createRuntimeShuffleFunction(
3231 IntType, Ptr,
M.getDataLayout().getPrefTypeAlign(ElemType)),
3233 Builder.CreateAlignedStore(Res, ElemPtr,
3234 M.getDataLayout().getPrefTypeAlign(ElemType));
3236 Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
3237 Value *LocalElemPtr =
3238 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
3244 Value *Res = createRuntimeShuffleFunction(
3245 AllocaIP,
Builder.CreateLoad(IntType, Ptr), IntType,
Offset);
3248 Res =
Builder.CreateTrunc(Res, ElemType);
3249 Builder.CreateStore(Res, ElemPtr);
3250 Ptr =
Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
3252 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
3258Error OpenMPIRBuilder::emitReductionListCopy(
3263 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3264 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
3268 for (
auto En :
enumerate(ReductionInfos)) {
3270 Value *SrcElementAddr =
nullptr;
3271 AllocaInst *DestAlloca =
nullptr;
3272 Value *DestElementAddr =
nullptr;
3273 Value *DestElementPtrAddr =
nullptr;
3275 bool ShuffleInElement =
false;
3278 bool UpdateDestListPtr =
false;
3282 ReductionArrayTy, SrcBase,
3283 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3284 SrcElementAddr =
Builder.CreateLoad(
Builder.getPtrTy(), SrcElementPtrAddr);
3288 DestElementPtrAddr =
Builder.CreateInBoundsGEP(
3289 ReductionArrayTy, DestBase,
3290 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3291 bool IsByRefElem = (!IsByRef.
empty() && IsByRef[En.index()]);
3297 Type *DestAllocaType =
3298 IsByRefElem ? RI.ByRefAllocatedType : RI.ElementType;
3299 DestAlloca =
Builder.CreateAlloca(DestAllocaType,
nullptr,
3300 ".omp.reduction.element");
3302 M.getDataLayout().getPrefTypeAlign(DestAllocaType));
3303 DestElementAddr = DestAlloca;
3306 DestElementAddr->
getName() +
".ascast");
3308 ShuffleInElement =
true;
3309 UpdateDestListPtr =
true;
3321 if (ShuffleInElement) {
3322 Type *ShuffleType = RI.ElementType;
3323 Value *ShuffleSrcAddr = SrcElementAddr;
3324 Value *ShuffleDestAddr = DestElementAddr;
3325 AllocaInst *LocalStorage =
nullptr;
3328 assert(RI.ByRefElementType &&
"Expected by-ref element type to be set");
3329 assert(RI.ByRefAllocatedType &&
3330 "Expected by-ref allocated type to be set");
3335 ShuffleType = RI.ByRefElementType;
3337 if (RI.DataPtrPtrGen) {
3340 Builder.saveIP(), ShuffleSrcAddr, ShuffleSrcAddr);
3343 return GenResult.takeError();
3352 LocalStorage =
Builder.CreateAlloca(ShuffleType);
3354 ShuffleDestAddr = LocalStorage;
3359 ShuffleDestAddr = DestElementAddr;
3363 shuffleAndStore(AllocaIP, ShuffleSrcAddr, ShuffleDestAddr, ShuffleType,
3364 RemoteLaneOffset, ReductionArrayTy, IsByRefElem);
3366 if (IsByRefElem && RI.DataPtrPtrGen) {
3368 Value *DestDescriptorAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3369 DestAlloca,
Builder.getPtrTy(),
".ascast");
3372 DestDescriptorAddr, LocalStorage, SrcElementAddr,
3373 RI.ByRefAllocatedType, RI.DataPtrPtrGen);
3376 return GenResult.takeError();
3379 switch (RI.EvaluationKind) {
3381 Value *Elem =
Builder.CreateLoad(RI.ElementType, SrcElementAddr);
3383 Builder.CreateStore(Elem, DestElementAddr);
3387 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3388 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
3390 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3392 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
3394 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3396 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3397 RI.ElementType, DestElementAddr, 0, 0,
".realp");
3398 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
3399 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
3400 Builder.CreateStore(SrcReal, DestRealPtr);
3401 Builder.CreateStore(SrcImg, DestImgPtr);
3406 M.getDataLayout().getTypeStoreSize(RI.ElementType));
3408 DestElementAddr,
M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3409 SrcElementAddr,
M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3421 if (UpdateDestListPtr) {
3422 Value *CastDestAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3423 DestElementAddr,
Builder.getPtrTy(),
3424 DestElementAddr->
getName() +
".ascast");
3425 Builder.CreateStore(CastDestAddr, DestElementPtrAddr);
3432Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
3436 LLVMContext &Ctx =
M.getContext();
3438 Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
3442 "_omp_reduction_inter_warp_copy_func", &
M);
3448 Builder.SetInsertPoint(EntryBB);
3465 StringRef TransferMediumName =
3466 "__openmp_nvptx_data_transfer_temporary_storage";
3467 GlobalVariable *TransferMedium =
M.getGlobalVariable(TransferMediumName);
3468 unsigned WarpSize =
Config.getGridValue().GV_Warp_Size;
3470 if (!TransferMedium) {
3471 TransferMedium =
new GlobalVariable(
3479 Value *GPUThreadID = getGPUThreadID();
3481 Value *LaneID = getNVPTXLaneID();
3483 Value *WarpID = getNVPTXWarpID();
3487 Builder.GetInsertBlock()->getFirstInsertionPt());
3491 AllocaInst *ReduceListAlloca =
Builder.CreateAlloca(
3492 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
3493 AllocaInst *NumWarpsAlloca =
3494 Builder.CreateAlloca(Arg1Type,
nullptr, NumWarpsArg->
getName() +
".addr");
3495 Value *ReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3496 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
3497 Value *NumWarpsAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3498 NumWarpsAlloca,
Builder.getPtrTy(0),
3499 NumWarpsAlloca->
getName() +
".ascast");
3500 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
3501 Builder.CreateStore(NumWarpsArg, NumWarpsAddrCast);
3510 for (
auto En :
enumerate(ReductionInfos)) {
3516 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
3517 unsigned RealTySize =
M.getDataLayout().getTypeAllocSize(
3518 IsByRefElem ? RI.ByRefElementType : RI.ElementType);
3519 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
3522 unsigned NumIters = RealTySize / TySize;
3525 Value *Cnt =
nullptr;
3526 Value *CntAddr =
nullptr;
3533 Builder.CreateAlloca(
Builder.getInt32Ty(),
nullptr,
".cnt.addr");
3535 CntAddr =
Builder.CreateAddrSpaceCast(CntAddr,
Builder.getPtrTy(),
3536 CntAddr->
getName() +
".ascast");
3548 Cnt, ConstantInt::get(
Builder.getInt32Ty(), NumIters));
3549 Builder.CreateCondBr(Cmp, BodyBB, ExitBB);
3556 omp::Directive::OMPD_unknown,
3560 return BarrierIP1.takeError();
3566 Value *IsWarpMaster =
Builder.CreateIsNull(LaneID,
"warp_master");
3567 Builder.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
3571 auto *RedListArrayTy =
3574 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3576 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3577 {ConstantInt::get(IndexTy, 0),
3578 ConstantInt::get(IndexTy, En.index())});
3582 if (IsByRefElem && RI.DataPtrPtrGen) {
3584 RI.DataPtrPtrGen(
Builder.saveIP(), ElemPtr, ElemPtr);
3587 return GenRes.takeError();
3598 ArrayTy, TransferMedium, {
Builder.getInt64(0), WarpID});
3603 Builder.CreateStore(Elem, MediumPtr,
3615 omp::Directive::OMPD_unknown,
3619 return BarrierIP2.takeError();
3626 Value *NumWarpsVal =
3629 Value *IsActiveThread =
3630 Builder.CreateICmpULT(GPUThreadID, NumWarpsVal,
"is_active_thread");
3631 Builder.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
3638 ArrayTy, TransferMedium, {
Builder.getInt64(0), GPUThreadID});
3640 Value *TargetElemPtrPtr =
3641 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3642 {ConstantInt::get(IndexTy, 0),
3643 ConstantInt::get(IndexTy, En.index())});
3644 Value *TargetElemPtrVal =
3646 Value *TargetElemPtr = TargetElemPtrVal;
3648 if (IsByRefElem && RI.DataPtrPtrGen) {
3650 RI.DataPtrPtrGen(
Builder.saveIP(), TargetElemPtr, TargetElemPtr);
3653 return GenRes.takeError();
3655 TargetElemPtr =
Builder.CreateLoad(
Builder.getPtrTy(), TargetElemPtr);
3663 Value *SrcMediumValue =
3664 Builder.CreateLoad(CType, SrcMediumPtrVal,
true);
3665 Builder.CreateStore(SrcMediumValue, TargetElemPtr);
3675 Cnt, ConstantInt::get(
Builder.getInt32Ty(), 1));
3676 Builder.CreateStore(Cnt, CntAddr,
false);
3678 auto *CurFn =
Builder.GetInsertBlock()->getParent();
3682 RealTySize %= TySize;
3692Expected<Function *> OpenMPIRBuilder::emitShuffleAndReduceFunction(
3695 LLVMContext &Ctx =
M.getContext();
3696 FunctionType *FuncTy =
3698 {Builder.getPtrTy(), Builder.getInt16Ty(),
3699 Builder.getInt16Ty(), Builder.getInt16Ty()},
3703 "_omp_reduction_shuffle_and_reduce_func", &
M);
3714 Builder.SetInsertPoint(EntryBB);
3725 Type *ReduceListArgType = ReduceListArg->
getType();
3729 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
3730 Value *LaneIdAlloca =
Builder.CreateAlloca(LaneIDArgType,
nullptr,
3731 LaneIDArg->
getName() +
".addr");
3733 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
3734 Value *AlgoVerAlloca =
Builder.CreateAlloca(LaneIDArgType,
nullptr,
3735 AlgoVerArg->
getName() +
".addr");
3742 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
3744 Value *ReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3745 ReduceListAlloca, ReduceListArgType,
3746 ReduceListAlloca->
getName() +
".ascast");
3747 Value *LaneIdAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3748 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
3749 Value *RemoteLaneOffsetAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3750 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
3751 RemoteLaneOffsetAlloca->
getName() +
".ascast");
3752 Value *AlgoVerAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3753 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
3754 Value *RemoteListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3755 RemoteReductionListAlloca,
Builder.getPtrTy(),
3756 RemoteReductionListAlloca->
getName() +
".ascast");
3758 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
3759 Builder.CreateStore(LaneIDArg, LaneIdAddrCast);
3760 Builder.CreateStore(RemoteLaneOffsetArg, RemoteLaneOffsetAddrCast);
3761 Builder.CreateStore(AlgoVerArg, AlgoVerAddrCast);
3763 Value *ReduceList =
Builder.CreateLoad(ReduceListArgType, ReduceListAddrCast);
3764 Value *LaneId =
Builder.CreateLoad(LaneIDArgType, LaneIdAddrCast);
3765 Value *RemoteLaneOffset =
3766 Builder.CreateLoad(LaneIDArgType, RemoteLaneOffsetAddrCast);
3767 Value *AlgoVer =
Builder.CreateLoad(LaneIDArgType, AlgoVerAddrCast);
3774 Error EmitRedLsCpRes = emitReductionListCopy(
3776 ReduceList, RemoteListAddrCast, IsByRef,
3777 {RemoteLaneOffset,
nullptr,
nullptr});
3780 return EmitRedLsCpRes;
3805 Value *LaneComp =
Builder.CreateICmpULT(LaneId, RemoteLaneOffset);
3810 Value *Algo2AndLaneIdComp =
Builder.CreateAnd(Algo2, LaneIdComp);
3811 Value *RemoteOffsetComp =
3813 Value *CondAlgo2 =
Builder.CreateAnd(Algo2AndLaneIdComp, RemoteOffsetComp);
3814 Value *CA0OrCA1 =
Builder.CreateOr(CondAlgo0, CondAlgo1);
3815 Value *CondReduce =
Builder.CreateOr(CA0OrCA1, CondAlgo2);
3821 Builder.CreateCondBr(CondReduce, ThenBB, ElseBB);
3823 Value *LocalReduceListPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3824 ReduceList,
Builder.getPtrTy());
3825 Value *RemoteReduceListPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3826 RemoteListAddrCast,
Builder.getPtrTy());
3828 ->addFnAttr(Attribute::NoUnwind);
3839 Value *LaneIdGtOffset =
Builder.CreateICmpUGE(LaneId, RemoteLaneOffset);
3840 Value *CondCopy =
Builder.CreateAnd(Algo1, LaneIdGtOffset);
3845 Builder.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
3849 EmitRedLsCpRes = emitReductionListCopy(
3851 RemoteListAddrCast, ReduceList, IsByRef);
3854 return EmitRedLsCpRes;
3869OpenMPIRBuilder::generateReductionDescriptor(
3871 Type *DescriptorType,
3877 Value *DescriptorSize =
3878 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(DescriptorType));
3880 DescriptorAddr,
M.getDataLayout().getPrefTypeAlign(DescriptorType),
3881 SrcDescriptorAddr,
M.getDataLayout().getPrefTypeAlign(DescriptorType),
3885 Value *DataPtrField;
3887 DataPtrPtrGen(
Builder.saveIP(), DescriptorAddr, DataPtrField);
3890 return GenResult.takeError();
3893 DataPtr,
Builder.getPtrTy(),
".ascast"),
3899Expected<Value *> OpenMPIRBuilder::createReductionDescriptorCopy(
3901 Value *SrcDescriptorAddr,
Type *DescriptorPtrTy,
const Twine &Name) {
3905 AllocaInst *DescriptorAlloca =
3906 Builder.CreateAlloca(RI.ByRefAllocatedType,
nullptr, Name);
3908 M.getDataLayout().getPrefTypeAlign(RI.ByRefAllocatedType));
3909 Value *DescriptorAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3910 DescriptorAlloca, DescriptorPtrTy,
3911 DescriptorAlloca->
getName() +
".ascast");
3916 generateReductionDescriptor(DescriptorAddr, DataPtr, SrcDescriptorAddr,
3917 RI.ByRefAllocatedType, RI.DataPtrPtrGen);
3919 return GenResult.takeError();
3921 return DescriptorAddr;
3924Expected<Function *> OpenMPIRBuilder::emitListToGlobalCopyFunction(
3928 LLVMContext &Ctx =
M.getContext();
3931 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3935 "_omp_reduction_list_to_global_copy_func", &
M);
3942 Builder.SetInsertPoint(EntryBlock);
3952 BufferArg->
getName() +
".addr");
3956 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3957 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3958 BufferArgAlloca,
Builder.getPtrTy(),
3959 BufferArgAlloca->
getName() +
".ascast");
3960 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3961 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3962 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3963 ReduceListArgAlloca,
Builder.getPtrTy(),
3964 ReduceListArgAlloca->
getName() +
".ascast");
3966 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3967 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3968 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3970 Value *LocalReduceList =
3972 Value *BufferArgVal =
3976 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3977 for (
auto En :
enumerate(ReductionInfos)) {
3979 auto *RedListArrayTy =
3983 RedListArrayTy, LocalReduceList,
3984 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3990 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArgVal, Idxs);
3992 ReductionsBufferTy, BufferVD, 0, En.index());
3994 switch (RI.EvaluationKind) {
3996 Value *TargetElement;
3998 if (IsByRef.
empty() || !IsByRef[En.index()]) {
3999 TargetElement =
Builder.CreateLoad(RI.ElementType, ElemPtr);
4001 if (RI.DataPtrPtrGen) {
4003 RI.DataPtrPtrGen(
Builder.saveIP(), ElemPtr, ElemPtr);
4006 return GenResult.takeError();
4010 TargetElement =
Builder.CreateLoad(RI.ByRefElementType, ElemPtr);
4013 Builder.CreateStore(TargetElement, GlobVal);
4017 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
4018 RI.ElementType, ElemPtr, 0, 0,
".realp");
4020 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
4022 RI.ElementType, ElemPtr, 0, 1,
".imagp");
4024 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
4026 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
4027 RI.ElementType, GlobVal, 0, 0,
".realp");
4028 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
4029 RI.ElementType, GlobVal, 0, 1,
".imagp");
4030 Builder.CreateStore(SrcReal, DestRealPtr);
4031 Builder.CreateStore(SrcImg, DestImgPtr);
4036 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(RI.ElementType));
4038 GlobVal,
M.getDataLayout().getPrefTypeAlign(RI.ElementType), ElemPtr,
4039 M.getDataLayout().getPrefTypeAlign(RI.ElementType), SizeVal,
false);
4050Expected<Function *> OpenMPIRBuilder::emitListToGlobalReduceFunction(
4054 LLVMContext &Ctx =
M.getContext();
4057 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
4061 "_omp_reduction_list_to_global_reduce_func", &
M);
4068 Builder.SetInsertPoint(EntryBlock);
4078 BufferArg->
getName() +
".addr");
4082 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
4083 auto *RedListArrayTy =
4088 Value *LocalReduceList =
4089 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
4093 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4094 BufferArgAlloca,
Builder.getPtrTy(),
4095 BufferArgAlloca->
getName() +
".ascast");
4096 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4097 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
4098 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4099 ReduceListArgAlloca,
Builder.getPtrTy(),
4100 ReduceListArgAlloca->
getName() +
".ascast");
4101 Value *LocalReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4102 LocalReduceList,
Builder.getPtrTy(),
4103 LocalReduceList->
getName() +
".ascast");
4105 Builder.CreateStore(BufferArg, BufferArgAddrCast);
4106 Builder.CreateStore(IdxArg, IdxArgAddrCast);
4107 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
4112 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4113 for (
auto En :
enumerate(ReductionInfos)) {
4116 Value *TargetElementPtrPtr =
Builder.CreateInBoundsGEP(
4117 RedListArrayTy, LocalReduceListAddrCast,
4118 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4120 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
4122 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
4123 ReductionsBufferTy, BufferVD, 0, En.index());
4125 if (!IsByRef.
empty() && IsByRef[En.index()] && RI.DataPtrPtrGen) {
4129 Value *SrcElementPtrPtr =
4130 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
4131 {ConstantInt::get(IndexTy, 0),
4132 ConstantInt::get(IndexTy, En.index())});
4133 Value *SrcDescriptorAddr =
4137 Expected<Value *> ByRefAlloc = createReductionDescriptorCopy(
4138 AllocaIP, RI, GlobValPtr, SrcDescriptorAddr,
Builder.getPtrTy());
4142 Builder.CreateStore(*ByRefAlloc, TargetElementPtrPtr);
4144 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
4152 ->addFnAttr(Attribute::NoUnwind);
4158Expected<Function *> OpenMPIRBuilder::emitGlobalToListCopyFunction(
4162 LLVMContext &Ctx =
M.getContext();
4165 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
4169 "_omp_reduction_global_to_list_copy_func", &
M);
4176 Builder.SetInsertPoint(EntryBlock);
4186 BufferArg->
getName() +
".addr");
4190 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
4191 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4192 BufferArgAlloca,
Builder.getPtrTy(),
4193 BufferArgAlloca->
getName() +
".ascast");
4194 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4195 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
4196 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4197 ReduceListArgAlloca,
Builder.getPtrTy(),
4198 ReduceListArgAlloca->
getName() +
".ascast");
4199 Builder.CreateStore(BufferArg, BufferArgAddrCast);
4200 Builder.CreateStore(IdxArg, IdxArgAddrCast);
4201 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
4203 Value *LocalReduceList =
4208 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4209 for (
auto En :
enumerate(ReductionInfos)) {
4210 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
4211 auto *RedListArrayTy =
4215 RedListArrayTy, LocalReduceList,
4216 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4221 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
4222 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
4223 ReductionsBufferTy, BufferVD, 0, En.index());
4229 if (!IsByRef.
empty() && IsByRef[En.index()]) {
4236 return GenResult.takeError();
4242 Value *TargetElement =
Builder.CreateLoad(ElemType, GlobValPtr);
4243 Builder.CreateStore(TargetElement, ElemPtr);
4247 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
4256 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
4258 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
4260 Builder.CreateStore(SrcReal, DestRealPtr);
4261 Builder.CreateStore(SrcImg, DestImgPtr);
4268 ElemPtr,
M.getDataLayout().getPrefTypeAlign(RI.
ElementType),
4269 GlobValPtr,
M.getDataLayout().getPrefTypeAlign(RI.
ElementType),
4281Expected<Function *> OpenMPIRBuilder::emitGlobalToListReduceFunction(
4285 LLVMContext &Ctx =
M.getContext();
4288 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
4292 "_omp_reduction_global_to_list_reduce_func", &
M);
4299 Builder.SetInsertPoint(EntryBlock);
4309 BufferArg->
getName() +
".addr");
4313 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
4319 Value *LocalReduceList =
4320 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
4324 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4325 BufferArgAlloca,
Builder.getPtrTy(),
4326 BufferArgAlloca->
getName() +
".ascast");
4327 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4328 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
4329 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4330 ReduceListArgAlloca,
Builder.getPtrTy(),
4331 ReduceListArgAlloca->
getName() +
".ascast");
4332 Value *ReductionList =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4333 LocalReduceList,
Builder.getPtrTy(),
4334 LocalReduceList->
getName() +
".ascast");
4336 Builder.CreateStore(BufferArg, BufferArgAddrCast);
4337 Builder.CreateStore(IdxArg, IdxArgAddrCast);
4338 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
4343 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4344 for (
auto En :
enumerate(ReductionInfos)) {
4347 Value *TargetElementPtrPtr =
Builder.CreateInBoundsGEP(
4348 RedListArrayTy, ReductionList,
4349 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4352 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
4353 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
4354 ReductionsBufferTy, BufferVD, 0, En.index());
4356 if (!IsByRef.
empty() && IsByRef[En.index()] && RI.DataPtrPtrGen) {
4358 Value *ReduceListVal =
4360 Value *SrcElementPtrPtr =
4361 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceListVal,
4362 {ConstantInt::get(IndexTy, 0),
4363 ConstantInt::get(IndexTy, En.index())});
4364 Value *SrcDescriptorAddr =
4368 Expected<Value *> ByRefAlloc = createReductionDescriptorCopy(
4369 AllocaIP, RI, GlobValPtr, SrcDescriptorAddr,
Builder.getPtrTy());
4373 Builder.CreateStore(*ByRefAlloc, TargetElementPtrPtr);
4375 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
4383 ->addFnAttr(Attribute::NoUnwind);
4389std::string OpenMPIRBuilder::getReductionFuncName(StringRef Name)
const {
4390 std::string Suffix =
4392 return (Name + Suffix).str();
4395Expected<Function *> OpenMPIRBuilder::createReductionFunction(
4398 AttributeList FuncAttrs) {
4400 {Builder.getPtrTy(), Builder.getPtrTy()},
4402 std::string
Name = getReductionFuncName(ReducerName);
4411 Builder.SetInsertPoint(EntryBB);
4415 Value *LHSArrayPtr =
nullptr;
4416 Value *RHSArrayPtr =
nullptr;
4423 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
4425 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
4426 Value *LHSAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4427 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
4428 Value *RHSAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4429 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
4430 Builder.CreateStore(Arg0, LHSAddrCast);
4431 Builder.CreateStore(Arg1, RHSAddrCast);
4432 LHSArrayPtr =
Builder.CreateLoad(Arg0Type, LHSAddrCast);
4433 RHSArrayPtr =
Builder.CreateLoad(Arg1Type, RHSAddrCast);
4437 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4439 for (
auto En :
enumerate(ReductionInfos)) {
4442 RedArrayTy, RHSArrayPtr,
4443 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4445 Value *RHSPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4446 RHSI8Ptr, RI.PrivateVariable->getType(),
4447 RHSI8Ptr->
getName() +
".ascast");
4450 RedArrayTy, LHSArrayPtr,
4451 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4453 Value *LHSPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4454 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
4463 if (!IsByRef.
empty() && !IsByRef[En.index()]) {
4464 LHS =
Builder.CreateLoad(RI.ElementType, LHSPtr);
4465 RHS =
Builder.CreateLoad(RI.ElementType, RHSPtr);
4472 return AfterIP.takeError();
4473 if (!
Builder.GetInsertBlock())
4474 return ReductionFunc;
4478 if (!IsByRef.
empty() && !IsByRef[En.index()])
4479 Builder.CreateStore(Reduced, LHSPtr);
4484 for (
auto En :
enumerate(ReductionInfos)) {
4485 unsigned Index = En.index();
4487 Value *LHSFixupPtr, *RHSFixupPtr;
4488 Builder.restoreIP(RI.ReductionGenClang(
4489 Builder.saveIP(), Index, &LHSFixupPtr, &RHSFixupPtr, ReductionFunc));
4494 LHSPtrs[Index], [ReductionFunc](
const Use &U) {
4499 RHSPtrs[Index], [ReductionFunc](
const Use &U) {
4513 return ReductionFunc;
4521 assert(RI.Variable &&
"expected non-null variable");
4522 assert(RI.PrivateVariable &&
"expected non-null private variable");
4523 assert((RI.ReductionGen || RI.ReductionGenClang) &&
4524 "expected non-null reduction generator callback");
4527 RI.Variable->getType() == RI.PrivateVariable->getType() &&
4528 "expected variables and their private equivalents to have the same "
4531 assert(RI.Variable->getType()->isPointerTy() &&
4532 "expected variables to be pointers");
4539 ArrayRef<bool> IsByRef,
bool IsNoWait,
bool IsTeamsReduction,
bool IsSPMD,
4541 Value *SrcLocInfo) {
4555 if (ReductionInfos.
size() == 0)
4565 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
4569 AttributeList FuncAttrs;
4570 AttrBuilder AttrBldr(Ctx);
4572 AttrBldr.addAttribute(Attr);
4573 AttrBldr.removeAttribute(Attribute::OptimizeNone);
4574 FuncAttrs = FuncAttrs.addFnAttributes(Ctx, AttrBldr);
4578 Builder.GetInsertBlock()->getParent()->getName(), ReductionInfos, IsByRef,
4580 if (!ReductionResult)
4582 Function *ReductionFunc = *ReductionResult;
4586 if (GridValue.has_value())
4587 Config.setGridValue(GridValue.value());
4602 Builder.getPtrTy(
M.getDataLayout().getProgramAddressSpace());
4606 Value *ReductionListAlloca =
4607 Builder.CreateAlloca(RedArrayTy,
nullptr,
".omp.reduction.red_list");
4608 Value *ReductionList =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4609 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
4612 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4613 for (
auto En :
enumerate(ReductionInfos)) {
4616 RedArrayTy, ReductionList,
4617 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4620 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
4625 Builder.CreatePointerBitCastOrAddrSpaceCast(PrivateVar, PtrTy);
4626 Builder.CreateStore(CastElem, ElemPtr);
4630 ReductionInfos, ReductionFunc, FuncAttrs, IsByRef);
4636 emitInterWarpCopyFunction(
Loc, ReductionInfos, FuncAttrs, IsByRef);
4642 Value *RL =
Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, PtrTy);
4651 unsigned MaxDataSize = 0;
4653 for (
auto En :
enumerate(ReductionInfos)) {
4657 Type *RedTypeArg = (!IsByRef.
empty() && IsByRef[En.index()])
4658 ? En.value().ByRefElementType
4659 : En.value().ElementType;
4660 auto Size =
M.getDataLayout().getTypeStoreSize(RedTypeArg);
4661 if (
Size > MaxDataSize)
4665 Value *ReductionDataSize =
4666 Builder.getInt64(MaxDataSize * ReductionInfos.
size());
4670 Function *CopyScratchToListFunc =
nullptr;
4672 Value *ScratchForCopyBack =
nullptr;
4675 Value *RLForCopyBack = RL;
4677 if (!IsTeamsReduction) {
4678 Value *SarFuncCast =
4679 Builder.CreatePointerBitCastOrAddrSpaceCast(*SarFunc, FuncPtrTy);
4681 Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, FuncPtrTy);
4682 Value *Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
4685 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
4690 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
4693 ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef);
4698 ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef);
4703 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs, IsByRef);
4726 Value *RuntimeRL = RL;
4733 ReductionsBufferTy,
nullptr,
".omp.reduction.scratch");
4734 Value *PerThreadScratch =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4735 PerThreadScratchAlloca, PtrTy,
4736 PerThreadScratchAlloca->
getName() +
".ascast");
4739 Value *PerThreadRedListAlloca =
4740 Builder.CreateAlloca(RedArrayTy,
nullptr,
4741 ".omp.reduction.per_thread_red_list");
4742 RuntimeRL =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4743 PerThreadRedListAlloca, PtrTy,
4744 PerThreadRedListAlloca->
getName() +
".ascast");
4749 for (
auto En :
enumerate(ReductionInfos)) {
4751 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
4754 ReductionsBufferTy, PerThreadScratch, 0, En.index());
4755 Value *Slot =
Builder.CreateConstInBoundsGEP2_32(RedArrayTy, RuntimeRL,
4758 Value *RuntimeListEntry = FieldPtr;
4760 Value *SrcDescriptor =
4763 AllocaIP, RI, FieldPtr, SrcDescriptor, PtrTy);
4766 RuntimeListEntry = *Descriptor;
4768 Builder.CreateStore(RuntimeListEntry, Slot);
4774 Type *CopyArg0Ty = (*LtGCFunc)->getFunctionType()->getParamType(0);
4775 Type *CopyArg2Ty = (*LtGCFunc)->getFunctionType()->getParamType(2);
4776 ScratchForCopyBack =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4777 PerThreadScratch, CopyArg0Ty);
4779 Builder.CreatePointerBitCastOrAddrSpaceCast(RL, CopyArg2Ty);
4787 *LtGCFunc, {ScratchForCopyBack,
Builder.getInt32(0), RLForCopyBack});
4788 CopyScratchToListFunc = *GtLCFunc;
4791 Value *Args3[] = {SrcLocInfo, RuntimeRL, *SarFunc, WcFunc,
4792 *LtGCFunc, *GtLCFunc, *GtLRFunc};
4795 RuntimeFunction::OMPRTL___kmpc_gpu_xteam_reduce_nowait);
4815 if (ScratchForCopyBack) {
4818 CopyScratchToListFunc,
4819 {ScratchForCopyBack,
Builder.getInt32(0), RLForCopyBack});
4823 for (
auto En :
enumerate(ReductionInfos)) {
4832 Value *LHSPtr, *RHSPtr;
4834 &LHSPtr, &RHSPtr, CurFunc));
4840 RedValue =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4842 if (RHSPtr->
getType() != RHS->getType())
4844 Builder.CreatePointerBitCastOrAddrSpaceCast(RHS, RHSPtr->
getType());
4855 if (IsByRef.
empty() || !IsByRef[En.index()]) {
4857 "red.value." +
Twine(En.index()));
4868 if (!IsByRef.
empty() && !IsByRef[En.index()])
4873 if (ContinuationBlock) {
4874 Builder.CreateBr(ContinuationBlock);
4875 Builder.SetInsertPoint(ContinuationBlock);
4877 Config.setEmitLLVMUsed();
4888 ".omp.reduction.func", &M);
4898 Builder.SetInsertPoint(ReductionFuncBlock);
4899 Value *LHSArrayPtr =
nullptr;
4900 Value *RHSArrayPtr =
nullptr;
4911 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
4913 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
4914 Value *LHSAddrCast =
4915 Builder.CreatePointerBitCastOrAddrSpaceCast(LHSAlloca, Arg0Type);
4916 Value *RHSAddrCast =
4917 Builder.CreatePointerBitCastOrAddrSpaceCast(RHSAlloca, Arg1Type);
4918 Builder.CreateStore(Arg0, LHSAddrCast);
4919 Builder.CreateStore(Arg1, RHSAddrCast);
4920 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
4921 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
4923 LHSArrayPtr = ReductionFunc->
getArg(0);
4924 RHSArrayPtr = ReductionFunc->
getArg(1);
4927 unsigned NumReductions = ReductionInfos.
size();
4930 for (
auto En :
enumerate(ReductionInfos)) {
4932 Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
4933 RedArrayTy, LHSArrayPtr, 0, En.index());
4934 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
4935 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4938 Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
4939 RedArrayTy, RHSArrayPtr, 0, En.index());
4940 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
4941 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4950 Builder.restoreIP(*AfterIP);
4952 if (!Builder.GetInsertBlock())
4956 if (!IsByRef[En.index()])
4957 Builder.CreateStore(Reduced, LHSPtr);
4959 Builder.CreateRetVoid();
4966 bool IsNoWait,
bool IsTeamsReduction) {
4970 IsByRef, IsNoWait, IsTeamsReduction);
4977 if (ReductionInfos.
size() == 0)
4987 unsigned NumReductions = ReductionInfos.
size();
4990 Value *RedArray =
Builder.CreateAlloca(RedArrayTy,
nullptr,
"red.array");
4992 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
4994 for (
auto En :
enumerate(ReductionInfos)) {
4995 unsigned Index = En.index();
4997 Value *RedArrayElemPtr =
Builder.CreateConstInBoundsGEP2_64(
4998 RedArrayTy, RedArray, 0, Index,
"red.array.elem." +
Twine(Index));
5005 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
5015 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
5020 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
5021 Constant *RedArraySize = ConstantInt::get(IndexTy, RedArrayByteSize);
5023 Value *Lock = getOMPCriticalRegionLock(
".reduction");
5025 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
5026 : RuntimeFunction::OMPRTL___kmpc_reduce);
5029 {Ident, ThreadId, NumVariables, RedArraySize,
5030 RedArray, ReductionFunc, Lock},
5041 Builder.CreateSwitch(ReduceCall, ContinuationBlock, 2);
5042 Switch->addCase(
Builder.getInt32(1), NonAtomicRedBlock);
5043 Switch->addCase(
Builder.getInt32(2), AtomicRedBlock);
5048 Builder.SetInsertPoint(NonAtomicRedBlock);
5049 for (
auto En :
enumerate(ReductionInfos)) {
5055 if (!IsByRef[En.index()]) {
5057 "red.value." +
Twine(En.index()));
5059 Value *PrivateRedValue =
5061 "red.private.value." +
Twine(En.index()));
5069 if (!
Builder.GetInsertBlock())
5072 if (!IsByRef[En.index()])
5076 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
5077 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
5079 Builder.CreateBr(ContinuationBlock);
5084 Builder.SetInsertPoint(AtomicRedBlock);
5085 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
5092 if (!
Builder.GetInsertBlock())
5095 Builder.CreateBr(ContinuationBlock);
5108 if (!
Builder.GetInsertBlock())
5111 Builder.SetInsertPoint(ContinuationBlock);
5122 Directive OMPD = Directive::OMPD_master;
5127 Value *Args[] = {Ident, ThreadId};
5135 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
5146 Directive OMPD = Directive::OMPD_masked;
5152 Value *ArgsEnd[] = {Ident, ThreadId};
5160 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
5170 Call->setDoesNotThrow();
5185 bool IsInclusive,
ScanInfo *ScanRedInfo) {
5187 llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars,
5188 ScanVarsType, ScanRedInfo);
5199 for (
size_t i = 0; i < ScanVars.
size(); i++) {
5202 Type *DestTy = ScanVarsType[i];
5203 Value *Val =
Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
5206 Builder.CreateStore(Src, Val);
5211 Builder.GetInsertBlock()->getParent());
5214 IV = ScanRedInfo->
IV;
5217 for (
size_t i = 0; i < ScanVars.
size(); i++) {
5220 Type *DestTy = ScanVarsType[i];
5222 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
5224 Builder.CreateStore(Src, ScanVars[i]);
5238 Builder.GetInsertBlock()->getParent());
5243Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
5247 Builder.restoreIP(AllocaIP);
5249 for (
size_t i = 0; i < ScanVars.
size(); i++) {
5251 Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
"vla");
5258 Builder.restoreIP(CodeGenIP);
5260 Builder.CreateAdd(ScanRedInfo->
Span, Builder.getInt32(1));
5261 for (
size_t i = 0; i < ScanVars.
size(); i++) {
5265 Value *Buff = Builder.CreateMalloc(
IntPtrTy, ScanVarsType[i], Allocsize,
5266 AllocSpan,
nullptr,
"arr");
5267 Builder.CreateStore(Buff, (*(ScanRedInfo->
ScanBuffPtrs))[ScanVars[i]]);
5285 Builder.SetInsertPoint(
Builder.GetInsertBlock()->getTerminator());
5294Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
5300 Value *PrivateVar = RedInfo.PrivateVariable;
5301 Value *OrigVar = RedInfo.Variable;
5305 Type *SrcTy = RedInfo.ElementType;
5310 Builder.CreateStore(Src, OrigVar);
5333 Builder.SetInsertPoint(
Builder.GetInsertBlock()->getTerminator());
5358 Builder.GetInsertBlock()->getModule(),
5365 Builder.GetInsertBlock()->getModule(),
5371 llvm::ConstantInt::get(ScanRedInfo->
Span->
getType(), 1));
5372 Builder.SetInsertPoint(InputBB);
5375 Builder.SetInsertPoint(LoopBB);
5391 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
5393 Builder.SetInsertPoint(InnerLoopBB);
5397 Value *ReductionVal = RedInfo.PrivateVariable;
5400 Type *DestTy = RedInfo.ElementType;
5403 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
5406 Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval,
"arrayOffset");
5411 RedInfo.ReductionGen(
Builder.saveIP(), LHS, RHS, Result);
5414 Builder.CreateStore(Result, LHSPtr);
5417 IVal, llvm::ConstantInt::get(
Builder.getInt32Ty(), 1));
5419 CmpI =
Builder.CreateICmpUGE(NextIVal, Pow2K);
5420 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
5423 Counter, llvm::ConstantInt::get(Counter->
getType(), 1));
5429 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
5450 Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos, ScanRedInfo);
5457Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
5469 Error Err = InputLoopGen();
5480 Error Err = ScanLoopGen(Builder.saveIP());
5487void OpenMPIRBuilder::createScanBBs(ScanInfo *ScanRedInfo) {
5524 Builder.SetInsertPoint(Preheader);
5527 Builder.SetInsertPoint(Header);
5528 PHINode *IndVarPHI =
Builder.CreatePHI(IndVarTy, 2,
"omp_" + Name +
".iv");
5529 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
5534 Builder.CreateICmpULT(IndVarPHI, TripCount,
"omp_" + Name +
".cmp");
5535 Builder.CreateCondBr(Cmp, Body, Exit);
5540 Builder.SetInsertPoint(Latch);
5542 "omp_" + Name +
".next",
true);
5553 CL->Header = Header;
5572 NextBB, NextBB, Name);
5604 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
5613 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
5614 ScanRedInfo->
Span = TripCount;
5620 ScanRedInfo->
IV =
IV;
5621 createScanBBs(ScanRedInfo);
5624 assert(Terminator->getNumSuccessors() == 1);
5625 BasicBlock *ContinueBlock = Terminator->getSuccessor(0);
5628 Builder.GetInsertBlock()->getParent());
5631 Builder.GetInsertBlock()->getParent());
5632 Builder.CreateBr(ContinueBlock);
5638 const auto &&InputLoopGen = [&]() ->
Error {
5640 Builder.saveIP(), BodyGen, Start, Stop, Step, IsSigned, InclusiveStop,
5641 ComputeIP, Name,
true, ScanRedInfo);
5645 Builder.restoreIP((*LoopInfo)->getAfterIP());
5651 InclusiveStop, ComputeIP, Name,
true, ScanRedInfo);
5655 Builder.restoreIP((*LoopInfo)->getAfterIP());
5659 Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen, ScanRedInfo);
5667 bool IsSigned,
bool InclusiveStop,
const Twine &Name) {
5677 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
5678 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
5682 ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
5698 Incr =
Builder.CreateSelect(IsNeg,
Builder.CreateNeg(Step), Step);
5701 Span =
Builder.CreateSub(UB, LB,
"",
false,
true);
5705 Span =
Builder.CreateSub(Stop, Start,
"",
true);
5710 Value *CountIfLooping;
5711 if (InclusiveStop) {
5712 CountIfLooping =
Builder.CreateAdd(
Builder.CreateUDiv(Span, Incr), One);
5718 CountIfLooping =
Builder.CreateSelect(OneCmp, One, CountIfTwo);
5721 return Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
5722 "omp_" + Name +
".tripcount");
5727 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
5734 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
5741 ScanRedInfo->
IV = IndVar;
5742 return BodyGenCB(
Builder.saveIP(), IndVar);
5748 Builder.getCurrentDebugLocation());
5759 unsigned Bitwidth = Ty->getIntegerBitWidth();
5762 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
5765 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
5775 unsigned Bitwidth = Ty->getIntegerBitWidth();
5778 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
5781 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
5789 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
5791 "Require dedicated allocate IP");
5797 uint32_t SrcLocStrSize;
5801 case WorksharingLoopType::ForStaticLoop:
5802 Flag = OMP_IDENT_FLAG_WORK_LOOP;
5804 case WorksharingLoopType::DistributeStaticLoop:
5805 Flag = OMP_IDENT_FLAG_WORK_DISTRIBUTE;
5807 case WorksharingLoopType::DistributeForStaticLoop:
5808 Flag = OMP_IDENT_FLAG_WORK_DISTRIBUTE | OMP_IDENT_FLAG_WORK_LOOP;
5815 Type *IVTy =
IV->getType();
5816 FunctionCallee StaticInit =
5817 LoopType == WorksharingLoopType::DistributeForStaticLoop
5820 FunctionCallee StaticFini =
5824 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
5827 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5828 Value *PLowerBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
5829 Value *PUpperBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
5830 Value *PStride =
Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
5839 Constant *One = ConstantInt::get(IVTy, 1);
5840 Builder.CreateStore(Zero, PLowerBound);
5842 Builder.CreateStore(UpperBound, PUpperBound);
5843 Builder.CreateStore(One, PStride);
5849 (LoopType == WorksharingLoopType::DistributeStaticLoop)
5850 ? OMPScheduleType::OrderedDistribute
5853 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5857 auto BuildInitCall = [LoopType, SrcLoc, ThreadNum, PLastIter, PLowerBound,
5858 PUpperBound, IVTy, PStride, One,
Zero, StaticInit,
5861 PLowerBound, PUpperBound});
5862 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5863 Value *PDistUpperBound =
5864 Builder.CreateAlloca(IVTy,
nullptr,
"p.distupperbound");
5865 Args.push_back(PDistUpperBound);
5870 BuildInitCall(SchedulingType,
Builder);
5871 if (HasDistSchedule &&
5872 LoopType != WorksharingLoopType::DistributeStaticLoop) {
5873 Constant *DistScheduleSchedType = ConstantInt::get(
5878 BuildInitCall(DistScheduleSchedType,
Builder);
5880 Value *LowerBound =
Builder.CreateLoad(IVTy, PLowerBound);
5881 Value *InclusiveUpperBound =
Builder.CreateLoad(IVTy, PUpperBound);
5882 Value *TripCountMinusOne =
Builder.CreateSub(InclusiveUpperBound, LowerBound);
5883 Value *TripCount =
Builder.CreateAdd(TripCountMinusOne, One);
5884 CLI->setTripCount(TripCount);
5890 CLI->mapIndVar([&](Instruction *OldIV) ->
Value * {
5894 return Builder.CreateAdd(OldIV, LowerBound);
5906 omp::Directive::OMPD_for,
false,
5909 return BarrierIP.takeError();
5936 Reachable.insert(
Block);
5946 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
5950OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
5954 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
5955 assert((ChunkSize || DistScheduleChunkSize) &&
"Chunk size is required");
5960 Type *IVTy =
IV->getType();
5962 "Max supported tripcount bitwidth is 64 bits");
5964 :
Type::getInt64Ty(Ctx);
5967 Constant *One = ConstantInt::get(InternalIVTy, 1);
5973 for (BasicBlock &BB : *
F)
5974 if (!BB.hasTerminator())
5975 UIs.
push_back(
new UnreachableInst(
F->getContext(), &BB));
5980 LoopInfo &&LI = LIA.
run(*
F,
FAM);
5981 for (Instruction *
I : UIs)
5982 I->eraseFromParent();
5985 if (ChunkSize || DistScheduleChunkSize)
5990 FunctionCallee StaticInit =
5992 FunctionCallee StaticFini =
5998 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5999 Value *PLowerBound =
6000 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.lowerbound");
6001 Value *PUpperBound =
6002 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.upperbound");
6003 Value *PStride =
Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.stride");
6012 ChunkSize ? ChunkSize : Zero, InternalIVTy,
"chunksize");
6013 Value *CastedDistScheduleChunkSize =
Builder.CreateZExtOrTrunc(
6014 DistScheduleChunkSize ? DistScheduleChunkSize : Zero, InternalIVTy,
6015 "distschedulechunksize");
6016 Value *CastedTripCount =
6017 Builder.CreateZExt(OrigTripCount, InternalIVTy,
"tripcount");
6020 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
6022 ConstantInt::get(I32Type,
static_cast<int>(DistScheduleSchedType));
6023 Builder.CreateStore(Zero, PLowerBound);
6024 Value *OrigUpperBound =
Builder.CreateSub(CastedTripCount, One);
6025 Value *IsTripCountZero =
Builder.CreateICmpEQ(CastedTripCount, Zero);
6027 Builder.CreateSelect(IsTripCountZero, Zero, OrigUpperBound);
6028 Builder.CreateStore(UpperBound, PUpperBound);
6029 Builder.CreateStore(One, PStride);
6033 uint32_t SrcLocStrSize;
6036 if (DistScheduleSchedType != OMPScheduleType::None) {
6037 Flag |= OMP_IDENT_FLAG_WORK_DISTRIBUTE;
6042 auto BuildInitCall = [StaticInit, SrcLoc, ThreadNum, PLastIter, PLowerBound,
6043 PUpperBound, PStride, One,
6044 this](
Value *SchedulingType,
Value *ChunkSize,
6047 StaticInit, {SrcLoc, ThreadNum,
6048 SchedulingType, PLastIter,
6049 PLowerBound, PUpperBound,
6053 BuildInitCall(SchedulingType, CastedChunkSize,
Builder);
6054 if (DistScheduleSchedType != OMPScheduleType::None &&
6055 SchedType != OMPScheduleType::OrderedDistributeChunked &&
6056 SchedType != OMPScheduleType::OrderedDistribute) {
6060 BuildInitCall(DistSchedulingType, CastedDistScheduleChunkSize,
Builder);
6064 Value *FirstChunkStart =
6065 Builder.CreateLoad(InternalIVTy, PLowerBound,
"omp_firstchunk.lb");
6066 Value *FirstChunkStop =
6067 Builder.CreateLoad(InternalIVTy, PUpperBound,
"omp_firstchunk.ub");
6068 Value *FirstChunkEnd =
Builder.CreateAdd(FirstChunkStop, One);
6070 Builder.CreateSub(FirstChunkEnd, FirstChunkStart,
"omp_chunk.range");
6071 Value *NextChunkStride =
6072 Builder.CreateLoad(InternalIVTy, PStride,
"omp_dispatch.stride");
6076 Value *DispatchCounter;
6084 DispatchCounter = Counter;
6087 FirstChunkStart, CastedTripCount, NextChunkStride,
6110 Value *ChunkEnd =
Builder.CreateAdd(DispatchCounter, ChunkRange);
6111 Value *IsLastChunk =
6112 Builder.CreateICmpUGE(ChunkEnd, CastedTripCount,
"omp_chunk.is_last");
6113 Value *CountUntilOrigTripCount =
6114 Builder.CreateSub(CastedTripCount, DispatchCounter);
6116 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
6117 Value *BackcastedChunkTC =
6118 Builder.CreateTrunc(ChunkTripCount, IVTy,
"omp_chunk.tripcount.trunc");
6119 CLI->setTripCount(BackcastedChunkTC);
6124 Value *BackcastedDispatchCounter =
6125 Builder.CreateTrunc(DispatchCounter, IVTy,
"omp_dispatch.iv.trunc");
6126 CLI->mapIndVar([&](Instruction *) ->
Value * {
6128 return Builder.CreateAdd(
IV, BackcastedDispatchCounter);
6141 return AfterIP.takeError();
6156static FunctionCallee
6159 unsigned Bitwidth = Ty->getIntegerBitWidth();
6162 case WorksharingLoopType::ForStaticLoop:
6165 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
6168 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
6170 case WorksharingLoopType::DistributeStaticLoop:
6173 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
6176 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
6178 case WorksharingLoopType::DistributeForStaticLoop:
6181 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
6184 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
6187 if (Bitwidth != 32 && Bitwidth != 64) {
6199 Function &LoopBodyFn,
bool NoLoop) {
6210 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
6211 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
6212 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
6213 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
6218 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
6219 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
6223 Builder.CreateZExtOrTrunc(NumThreads, TripCountTy,
"num.threads.cast"));
6224 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
6225 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
6226 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
6227 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), NoLoop));
6229 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
6253 Builder.restoreIP({Preheader, Preheader->
end()});
6256 Builder.CreateBr(CLI->
getExit());
6264 CleanUpInfo.
collectBlocks(RegionBlockSet, BlocksToBeRemoved);
6272 "Expected unique undroppable user of outlined function");
6274 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
6276 "Expected outlined function call to be located in loop preheader");
6278 if (OutlinedFnCallInstruction->
arg_size() > 1)
6285 LoopBodyArg, TripCount, OutlinedFn, NoLoop);
6287 for (
auto &ToBeDeletedItem : ToBeDeleted)
6288 ToBeDeletedItem->eraseFromParent();
6295 uint32_t SrcLocStrSize;
6299 case WorksharingLoopType::ForStaticLoop:
6300 Flag = OMP_IDENT_FLAG_WORK_LOOP;
6302 case WorksharingLoopType::DistributeStaticLoop:
6303 Flag = OMP_IDENT_FLAG_WORK_DISTRIBUTE;
6305 case WorksharingLoopType::DistributeForStaticLoop:
6306 Flag = OMP_IDENT_FLAG_WORK_DISTRIBUTE | OMP_IDENT_FLAG_WORK_LOOP;
6311 auto OI = std::make_unique<OutlineInfo>();
6316 SmallVector<Instruction *, 4> ToBeDeleted;
6318 OI->OuterAllocBB = AllocaIP.getBlock();
6341 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
6343 OI->collectBlocks(ParallelRegionBlockSet, Blocks);
6345 CodeExtractorAnalysisCache CEAC(*OuterFn);
6346 CodeExtractor Extractor(Blocks,
6360 SetVector<Value *> SinkingCands, HoistingCands;
6364 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
6371 for (
auto Use :
Users) {
6373 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
6374 Inst->replaceUsesOfWith(CLI->
getIndVar(), NewLoopCntLoad);
6380 OI->ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
6387 OI->PostOutlineCB = [=, ToBeDeletedVec =
6388 std::move(ToBeDeleted)](
Function &OutlinedFn) {
6398 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
6399 bool HasSimdModifier,
bool HasMonotonicModifier,
6400 bool HasNonmonotonicModifier,
bool HasOrderedClause,
6402 Value *DistScheduleChunkSize) {
6403 if (
Config.isTargetDevice())
6404 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType, NoLoop);
6406 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
6407 HasNonmonotonicModifier, HasOrderedClause, DistScheduleChunkSize);
6409 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
6410 OMPScheduleType::ModifierOrdered;
6412 if (HasDistSchedule) {
6413 DistScheduleSchedType = DistScheduleChunkSize
6414 ? OMPScheduleType::OrderedDistributeChunked
6415 : OMPScheduleType::OrderedDistribute;
6417 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
6418 case OMPScheduleType::BaseStatic:
6419 case OMPScheduleType::BaseDistribute:
6420 assert((!ChunkSize || !DistScheduleChunkSize) &&
6421 "No chunk size with static-chunked schedule");
6422 if (IsOrdered && !HasDistSchedule)
6423 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6424 NeedsBarrier, ChunkSize);
6426 if (DistScheduleChunkSize)
6427 return applyStaticChunkedWorkshareLoop(
6428 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
6429 DistScheduleChunkSize, DistScheduleSchedType);
6430 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, LoopType, NeedsBarrier,
6433 case OMPScheduleType::BaseStaticChunked:
6434 case OMPScheduleType::BaseDistributeChunked:
6435 if (IsOrdered && !HasDistSchedule)
6436 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6437 NeedsBarrier, ChunkSize);
6439 return applyStaticChunkedWorkshareLoop(
6440 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
6441 DistScheduleChunkSize, DistScheduleSchedType);
6443 case OMPScheduleType::BaseRuntime:
6444 case OMPScheduleType::BaseAuto:
6445 case OMPScheduleType::BaseGreedy:
6446 case OMPScheduleType::BaseBalanced:
6447 case OMPScheduleType::BaseSteal:
6448 case OMPScheduleType::BaseRuntimeSimd:
6450 "schedule type does not support user-defined chunk sizes");
6452 case OMPScheduleType::BaseGuidedSimd:
6453 case OMPScheduleType::BaseDynamicChunked:
6454 case OMPScheduleType::BaseGuidedChunked:
6455 case OMPScheduleType::BaseGuidedIterativeChunked:
6456 case OMPScheduleType::BaseGuidedAnalyticalChunked:
6457 case OMPScheduleType::BaseStaticBalancedChunked:
6458 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6459 NeedsBarrier, ChunkSize);
6472 unsigned Bitwidth = Ty->getIntegerBitWidth();
6475 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
6478 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
6486static FunctionCallee
6488 unsigned Bitwidth = Ty->getIntegerBitWidth();
6491 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
6494 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
6501static FunctionCallee
6503 unsigned Bitwidth = Ty->getIntegerBitWidth();
6506 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
6509 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
6514OpenMPIRBuilder::applyDynamicWorkshareLoop(
DebugLoc DL, CanonicalLoopInfo *CLI,
6517 bool NeedsBarrier,
Value *Chunk) {
6518 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
6520 "Require dedicated allocate IP");
6522 "Require valid schedule type");
6524 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
6525 OMPScheduleType::ModifierOrdered;
6530 uint32_t SrcLocStrSize;
6537 Type *IVTy =
IV->getType();
6542 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
6544 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
6545 Value *PLowerBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
6546 Value *PUpperBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
6547 Value *PStride =
Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
6556 Constant *One = ConstantInt::get(IVTy, 1);
6557 Builder.CreateStore(One, PLowerBound);
6559 Builder.CreateStore(UpperBound, PUpperBound);
6560 Builder.CreateStore(One, PStride);
6578 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
6590 Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt());
6593 {SrcLoc, ThreadNum, PLastIter, PLowerBound, PUpperBound, PStride});
6594 Constant *Zero32 = ConstantInt::get(I32Type, 0);
6597 Builder.CreateSub(
Builder.CreateLoad(IVTy, PLowerBound), One,
"lb");
6598 Builder.CreateCondBr(MoreWork, Header, Exit);
6604 PI->setIncomingBlock(0, OuterCond);
6605 PI->setIncomingValue(0, LowerBound);
6610 Br->setSuccessor(OuterCond);
6616 UpperBound =
Builder.CreateLoad(IVTy, PUpperBound,
"ub");
6619 CI->setOperand(1, UpperBound);
6623 assert(BI->getSuccessor(1) == Exit);
6624 BI->setSuccessor(1, OuterCond);
6638 omp::Directive::OMPD_for,
false,
6641 return BarrierIP.takeError();
6693 assert(
Loops.size() >= 1 &&
"At least one loop required");
6694 size_t NumLoops =
Loops.size();
6698 return Loops.front();
6710 Loop->collectControlBlocks(OldControlBBs);
6714 if (ComputeIP.
isSet())
6721 Value *CollapsedTripCount =
nullptr;
6724 "All loops to collapse must be valid canonical loops");
6725 Value *OrigTripCount = L->getTripCount();
6726 if (!CollapsedTripCount) {
6727 CollapsedTripCount = OrigTripCount;
6732 CollapsedTripCount =
6733 Builder.CreateNUWMul(CollapsedTripCount, OrigTripCount);
6739 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
6745 Builder.restoreIP(Result->getBodyIP());
6747 Value *Leftover = Result->getIndVar();
6749 NewIndVars.
resize(NumLoops);
6750 for (
int i = NumLoops - 1; i >= 1; --i) {
6751 Value *OrigTripCount =
Loops[i]->getTripCount();
6753 Value *NewIndVar =
Builder.CreateURem(Leftover, OrigTripCount);
6754 NewIndVars[i] = NewIndVar;
6756 Leftover =
Builder.CreateUDiv(Leftover, OrigTripCount);
6759 NewIndVars[0] = Leftover;
6768 BasicBlock *ContinueBlock = Result->getBody();
6770 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
6777 ContinueBlock =
nullptr;
6778 ContinuePred = NextSrc;
6785 for (
size_t i = 0; i < NumLoops - 1; ++i)
6786 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
6792 for (
size_t i = NumLoops - 1; i > 0; --i)
6793 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
6796 ContinueWith(Result->getLatch(),
nullptr);
6803 for (
size_t i = 0; i < NumLoops; ++i)
6804 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
6818std::vector<CanonicalLoopInfo *>
6822 "Must pass as many tile sizes as there are loops");
6823 int NumLoops =
Loops.size();
6824 assert(NumLoops >= 1 &&
"At least one loop to tile required");
6836 Loop->collectControlBlocks(OldControlBBs);
6844 assert(L->isValid() &&
"All input loops must be valid canonical loops");
6845 OrigTripCounts.
push_back(L->getTripCount());
6856 for (
int i = 0; i < NumLoops - 1; ++i) {
6869 for (
int i = 0; i < NumLoops; ++i) {
6871 Value *OrigTripCount = OrigTripCounts[i];
6884 Value *FloorTripOverflow =
6885 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
6887 FloorTripOverflow =
Builder.CreateZExt(FloorTripOverflow, IVType);
6888 Value *FloorTripCount =
6889 Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow,
6890 "omp_floor" +
Twine(i) +
".tripcount",
true);
6893 FloorCompleteCount.
push_back(FloorCompleteTripCount);
6899 std::vector<CanonicalLoopInfo *> Result;
6900 Result.reserve(NumLoops * 2);
6913 auto EmbeddNewLoop =
6914 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
6917 DL, TripCount,
F, InnerEnter, OutroInsertBefore, Name);
6922 Enter = EmbeddedLoop->
getBody();
6924 OutroInsertBefore = EmbeddedLoop->
getLatch();
6925 return EmbeddedLoop;
6929 const Twine &NameBase) {
6932 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
6933 Result.push_back(EmbeddedLoop);
6937 EmbeddNewLoops(FloorCount,
"floor");
6943 for (
int i = 0; i < NumLoops; ++i) {
6947 Value *FloorIsEpilogue =
6949 Value *TileTripCount =
6956 EmbeddNewLoops(TileCounts,
"tile");
6961 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
6970 BodyEnter =
nullptr;
6971 BodyEntered = ExitBB;
6983 Builder.restoreIP(Result.back()->getBodyIP());
6984 for (
int i = 0; i < NumLoops; ++i) {
6987 Value *OrigIndVar = OrigIndVars[i];
7015 if (Properties.
empty())
7038 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
7042 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
7050 if (
I.mayReadOrWriteMemory()) {
7054 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
7068 Loop->collectControlBlocks(oldControlBBs);
7073 assert(L->isValid() &&
"All input loops must be valid canonical loops");
7074 origTripCounts.
push_back(L->getTripCount());
7083 Builder.SetInsertPoint(TCBlock);
7084 Value *fusedTripCount =
nullptr;
7086 assert(L->isValid() &&
"All loops to fuse must be valid canonical loops");
7087 Value *origTripCount = L->getTripCount();
7088 if (!fusedTripCount) {
7089 fusedTripCount = origTripCount;
7092 Value *condTP =
Builder.CreateICmpSGT(fusedTripCount, origTripCount);
7093 fusedTripCount =
Builder.CreateSelect(condTP, fusedTripCount, origTripCount,
7107 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
7108 Loops[i]->getPreheader()->moveBefore(TCBlock);
7109 Loops[i]->getAfter()->moveBefore(TCBlock);
7113 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
7125 for (
size_t i = 0; i <
Loops.size(); ++i) {
7127 F->getContext(),
"omp.fused.inner.cond",
F,
Loops[i]->getBody());
7128 Builder.SetInsertPoint(condBlock);
7136 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
7137 Builder.SetInsertPoint(condBBs[i]);
7138 Builder.CreateCondBr(condValues[i],
Loops[i]->getBody(), condBBs[i + 1]);
7154 "omp.fused.pre_latch");
7187 const Twine &NamePrefix) {
7216 C, NamePrefix +
".if.then",
Cond->getParent(),
Cond->getNextNode());
7218 C, NamePrefix +
".if.else",
Cond->getParent(), CanonicalLoop->
getExit());
7221 Builder.SetInsertPoint(SplitBeforeIt);
7223 Builder.CreateCondBr(IfCond, ThenBlock, ElseBlock);
7226 spliceBB(IP, ThenBlock,
false, Builder.getCurrentDebugLocation());
7229 Builder.SetInsertPoint(ElseBlock);
7235 ExistingBlocks.
reserve(L->getNumBlocks() + 1);
7237 ExistingBlocks.
append(L->block_begin(), L->block_end());
7243 assert(LoopCond && LoopHeader &&
"Invalid loop structure");
7245 if (
Block == L->getLoopPreheader() ||
Block == L->getLoopLatch() ||
7252 if (
Block == ThenBlock)
7253 NewBB->
setName(NamePrefix +
".if.else");
7256 VMap[
Block] = NewBB;
7264 L->getLoopLatch()->splitBasicBlockBefore(
L->getLoopLatch()->begin(),
7265 NamePrefix +
".pre_latch");
7269 L->addBasicBlockToLoop(ThenBlock, LI);
7275 if (TargetTriple.
isX86()) {
7276 if (Features.
lookup(
"avx512f"))
7278 else if (Features.
lookup(
"avx"))
7282 if (TargetTriple.
isPPC())
7284 if (TargetTriple.
isWasm())
7291 Value *IfCond, OrderKind Order,
7301 if (!BB.hasTerminator())
7317 I->eraseFromParent();
7320 if (AlignedVars.
size()) {
7322 for (
auto &AlignedItem : AlignedVars) {
7323 Value *AlignedPtr = AlignedItem.first;
7324 Value *Alignment = AlignedItem.second;
7327 Builder.CreateAlignmentAssumption(
F->getDataLayout(), AlignedPtr,
7335 createIfVersion(CanonicalLoop, IfCond, VMap, LIA, LI, L,
"simd");
7348 Reachable.insert(
Block);
7358 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent))
7374 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
7376 if (Simdlen || Safelen) {
7380 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
7406static std::unique_ptr<TargetMachine>
7410 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
7411 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
7422 std::nullopt, OptLevel));
7440 if (!BB.hasTerminator())
7453 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
7454 FAM.registerPass([&]() {
return TIRA; });
7468 I->eraseFromParent();
7471 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
7476 nullptr, ORE,
static_cast<int>(OptLevel),
7497 <<
" Threshold=" << UP.
Threshold <<
"\n"
7500 <<
" PartialOptSizeThreshold="
7520 Ptr = Load->getPointerOperand();
7522 Ptr = Store->getPointerOperand();
7529 if (Alloca->getParent() == &
F->getEntryBlock())
7549 int MaxTripCount = 0;
7550 bool MaxOrZero =
false;
7551 unsigned TripMultiple = 0;
7554 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP);
7555 unsigned Factor = UP.
Count;
7556 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
7567 assert(Factor >= 0 &&
"Unroll factor must not be negative");
7583 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
7596 *UnrolledCLI =
Loop;
7601 "unrolling only makes sense with a factor of 2 or larger");
7603 Type *IndVarTy =
Loop->getIndVarType();
7610 std::vector<CanonicalLoopInfo *>
LoopNest =
7625 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
7628 (*UnrolledCLI)->assertOK();
7646 Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
7665 if (!CPVars.
empty()) {
7670 Directive OMPD = Directive::OMPD_single;
7675 Value *Args[] = {Ident, ThreadId};
7684 if (
Error Err = FiniCB(IP))
7705 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
7712 for (
size_t I = 0, E = CPVars.
size();
I < E; ++
I)
7715 ConstantInt::get(Int64, 0), CPVars[
I],
7718 }
else if (!IsNowait) {
7721 omp::Directive::OMPD_unknown,
false,
7739 Directive::OMPD_scope,
nullptr,
nullptr,
7740 BodyGenCB, FiniCB,
false,
true,
7748 omp::Directive::OMPD_unknown,
7764 Directive OMPD = Directive::OMPD_critical;
7769 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
7770 Value *Args[] = {Ident, ThreadId, LockVar};
7787 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
7795 const Twine &Name,
bool IsDependSource) {
7799 "OpenMP runtime requires depend vec with i64 type");
7812 for (
unsigned I = 0;
I < NumLoops; ++
I) {
7826 Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
7844 Directive OMPD = Directive::OMPD_ordered;
7853 Value *Args[] = {Ident, ThreadId};
7863 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
7870 bool HasFinalize,
bool IsCancellable) {
7877 BasicBlock *EntryBB = Builder.GetInsertBlock();
7886 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
7898 "Unexpected control flow graph state!!");
7900 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
7902 return AfterIP.takeError();
7907 "Unexpected Insertion point location!");
7910 auto InsertBB = merged ? ExitPredBB : ExitBB;
7913 Builder.SetInsertPoint(InsertBB);
7915 return Builder.saveIP();
7919 Directive OMPD,
Value *EntryCall, BasicBlock *ExitBB,
bool Conditional) {
7921 if (!Conditional || !EntryCall)
7927 auto *UI =
new UnreachableInst(
Builder.getContext(), ThenBB);
7937 Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
7941 UI->eraseFromParent();
7949 omp::Directive OMPD,
InsertPointTy FinIP, Instruction *ExitCall,
7957 "Unexpected finalization stack state!");
7960 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
7962 if (
Error Err = Fi.mergeFiniBB(
Builder, FinIP.getBlock()))
7963 return std::move(Err);
7967 Builder.SetInsertPoint(FinIP.getBlock()->getTerminator());
7977 return IRBuilder<>::InsertPoint(ExitCall->
getParent(),
8011 "copyin.not.master.end");
8018 Builder.SetInsertPoint(OMP_Entry);
8021 Value *cmp =
Builder.CreateICmpNE(MasterPtr, PrivatePtr);
8022 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
8024 Builder.SetInsertPoint(CopyBegin);
8042 Value *Args[] = {ThreadId,
Size, Allocator};
8065 return Builder.CreateCall(Fn, Args, Name);
8079 Value *Args[] = {ThreadId, Addr, Allocator};
8086 const Twine &Name) {
8094 M.getContext(),
M.getDataLayout().getPrefTypeAlign(Int64)));
8100 const Twine &Name) {
8102 Loc,
Builder.getInt64(
M.getDataLayout().getTypeAllocSize(VarType)), Name);
8107 const Twine &Name) {
8113 return Builder.CreateCall(Fn, Args, Name);
8118 const Twine &Name) {
8120 Loc, Addr,
Builder.getInt64(
M.getDataLayout().getTypeAllocSize(VarType)),
8127 Value *DependenceAddress,
bool HaveNowaitClause) {
8135 if (Device ==
nullptr)
8137 Constant *InteropTypeVal = ConstantInt::get(Int32, (
int)InteropType);
8138 if (NumDependences ==
nullptr) {
8139 NumDependences = ConstantInt::get(Int32, 0);
8143 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
8145 Ident, ThreadId, InteropVar, InteropTypeVal,
8146 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
8155 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
8163 if (Device ==
nullptr)
8165 if (NumDependences ==
nullptr) {
8166 NumDependences = ConstantInt::get(Int32, 0);
8170 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
8172 Ident, ThreadId, InteropVar, Device,
8173 NumDependences, DependenceAddress, HaveNowaitClauseVal};
8182 Value *NumDependences,
8183 Value *DependenceAddress,
8184 bool HaveNowaitClause) {
8191 if (Device ==
nullptr)
8193 if (NumDependences ==
nullptr) {
8194 NumDependences = ConstantInt::get(Int32, 0);
8198 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
8200 Ident, ThreadId, InteropVar, Device,
8201 NumDependences, DependenceAddress, HaveNowaitClauseVal};
8231 assert(!Attrs.MaxThreads.empty() && !Attrs.MaxTeams.empty() &&
8232 "expected num_threads and num_teams to be specified");
8252 const std::string DebugPrefix =
"_debug__";
8253 if (KernelName.
ends_with(DebugPrefix)) {
8254 KernelName = KernelName.
drop_back(DebugPrefix.length());
8255 Kernel =
M.getFunction(KernelName);
8261 if (Attrs.MinTeams > 1 || Attrs.MaxTeams.front() > 0)
8266 int32_t MaxThreadsVal = Attrs.MaxThreads.front();
8273 MaxThreadsVal = Attrs.MinThreads;
8277 if (MaxThreadsVal > 0)
8288 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
8291 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
8292 Constant *DynamicEnvironmentInitializer =
8296 DynamicEnvironmentInitializer, DynamicEnvironmentName,
8298 DL.getDefaultGlobalsAddressSpace());
8302 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
8303 ? DynamicEnvironmentGV
8305 DynamicEnvironmentPtr);
8308 ConfigurationEnvironment, {
8309 UseGenericStateMachineVal,
8310 MayUseNestedParallelismVal,
8319 KernelEnvironment, {
8320 ConfigurationEnvironmentInitializer,
8324 std::string KernelEnvironmentName =
8325 (KernelName +
"_kernel_environment").str();
8328 KernelEnvironmentInitializer, KernelEnvironmentName,
8330 DL.getDefaultGlobalsAddressSpace());
8334 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
8335 ? KernelEnvironmentGV
8337 KernelEnvironmentPtr);
8338 Value *KernelLaunchEnvironment =
8341 KernelLaunchEnvironment =
8342 KernelLaunchEnvironment->
getType() == KernelLaunchEnvParamTy
8343 ? KernelLaunchEnvironment
8344 :
Builder.CreateAddrSpaceCast(KernelLaunchEnvironment,
8345 KernelLaunchEnvParamTy);
8347 Fn, {KernelEnvironment, KernelLaunchEnvironment});
8359 auto *UI =
Builder.CreateUnreachable();
8365 Builder.SetInsertPoint(WorkerExitBB);
8369 Builder.SetInsertPoint(CheckBBTI);
8370 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
8372 CheckBBTI->eraseFromParent();
8373 UI->eraseFromParent();
8381 int32_t TeamsReductionDataSize) {
8386 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
8390 if (!TeamsReductionDataSize)
8396 const std::string DebugPrefix =
"_debug__";
8398 KernelName = KernelName.
drop_back(DebugPrefix.length());
8399 auto *KernelEnvironmentGV =
8400 M.getNamedGlobal((KernelName +
"_kernel_environment").str());
8401 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
8402 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->getInitializer();
8404 KernelEnvironmentInitializer,
8405 ConstantInt::get(Int32, TeamsReductionDataSize), {0, 7});
8406 KernelEnvironmentGV->setInitializer(NewInitializer);
8411 if (
Kernel.hasFnAttribute(Name)) {
8412 int32_t OldLimit =
Kernel.getFnAttributeAsParsedInteger(Name);
8418std::pair<int32_t, int32_t>
8420 int32_t ThreadLimit =
8421 Kernel.getFnAttributeAsParsedInteger(
"omp_target_thread_limit");
8424 const auto &Attr =
Kernel.getFnAttribute(
"amdgpu-flat-work-group-size");
8425 if (!Attr.isValid() || !Attr.isStringAttribute())
8426 return {0, ThreadLimit};
8427 auto [LBStr, UBStr] = Attr.getValueAsString().split(
',');
8430 return {0, ThreadLimit};
8431 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
8439 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
8441 return {0, ThreadLimit};
8447 Kernel.addFnAttr(
"omp_target_thread_limit", std::to_string(UB));
8450 Kernel.addFnAttr(
"amdgpu-flat-work-group-size",
8458std::pair<int32_t, int32_t>
8461 return {0,
Kernel.getFnAttributeAsParsedInteger(
"omp_target_num_teams")};
8465 int32_t LB, int32_t UB) {
8473 Kernel.addFnAttr(
"omp_target_num_teams", std::to_string(LB));
8476void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
8485 else if (
T.isNVPTX())
8487 else if (
T.isSPIRV())
8492Constant *OpenMPIRBuilder::createOutlinedFunctionID(Function *OutlinedFn,
8493 StringRef EntryFnIDName) {
8494 if (
Config.isTargetDevice()) {
8495 assert(OutlinedFn &&
"The outlined function must exist if embedded");
8499 return new GlobalVariable(
8504Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(Function *OutlinedFn,
8505 StringRef EntryFnName) {
8509 assert(!
M.getGlobalVariable(EntryFnName,
true) &&
8510 "Named kernel already exists?");
8511 return new GlobalVariable(
8524 if (
Config.isTargetDevice() || !
Config.openMPOffloadMandatory()) {
8528 OutlinedFn = *CBResult;
8530 OutlinedFn =
nullptr;
8536 if (!IsOffloadEntry)
8539 std::string EntryFnIDName =
8541 ? std::string(EntryFnName)
8545 EntryFnName, EntryFnIDName);
8553 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
8554 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
8555 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
8557 EntryInfo, EntryAddr, OutlinedFnID,
8559 return OutlinedFnID;
8577 bool IsStandAlone = !BodyGenCB;
8584 MapInfo = &GenMapInfoCB(
Builder.saveIP());
8586 AllocaIP,
Builder.saveIP(), *MapInfo, Info, CustomMapperCB,
8587 true, DeviceAddrCB))
8594 Value *PointerNum =
Builder.getInt32(Info.NumberOfPtrs);
8604 SrcLocInfo, DeviceID,
8611 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
8615 if (Info.HasNoWait) {
8625 if (Info.HasNoWait) {
8629 emitBlock(OffloadContBlock, CurFn,
true);
8635 bool RequiresOuterTargetTask = Info.HasNoWait;
8636 if (!RequiresOuterTargetTask)
8637 cantFail(TaskBodyCB(
nullptr,
nullptr,
8641 {}, RTArgs, Info.HasNoWait));
8644 omp::OMPRTL___tgt_target_data_begin_mapper);
8648 for (
auto DeviceMap : Info.DevicePtrInfoMap) {
8652 Builder.CreateStore(LI, DeviceMap.second.second);
8689 Value *PointerNum =
Builder.getInt32(Info.NumberOfPtrs);
8698 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
8721 return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
8722 return BeginThenGen(AllocaIP,
Builder.saveIP(), DeallocBlocks);
8737 return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
8738 return EndThenGen(AllocaIP,
Builder.saveIP(), DeallocBlocks);
8741 return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
8742 return BeginThenGen(AllocaIP,
Builder.saveIP(), DeallocBlocks);
8753 bool IsGPUDistribute) {
8754 assert((IVSize == 32 || IVSize == 64) &&
8755 "IV size is not compatible with the omp runtime");
8757 if (IsGPUDistribute)
8759 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
8760 : omp::OMPRTL___kmpc_distribute_static_init_4u)
8761 : (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_8
8762 : omp::OMPRTL___kmpc_distribute_static_init_8u);
8764 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
8765 : omp::OMPRTL___kmpc_for_static_init_4u)
8766 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
8767 : omp::OMPRTL___kmpc_for_static_init_8u);
8774 assert((IVSize == 32 || IVSize == 64) &&
8775 "IV size is not compatible with the omp runtime");
8777 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
8778 : omp::OMPRTL___kmpc_dispatch_init_4u)
8779 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_8
8780 : omp::OMPRTL___kmpc_dispatch_init_8u);
8787 assert((IVSize == 32 || IVSize == 64) &&
8788 "IV size is not compatible with the omp runtime");
8790 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
8791 : omp::OMPRTL___kmpc_dispatch_next_4u)
8792 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_8
8793 : omp::OMPRTL___kmpc_dispatch_next_8u);
8800 assert((IVSize == 32 || IVSize == 64) &&
8801 "IV size is not compatible with the omp runtime");
8803 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
8804 : omp::OMPRTL___kmpc_dispatch_fini_4u)
8805 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_8
8806 : omp::OMPRTL___kmpc_dispatch_fini_8u);
8817 DenseMap<
Value *, std::tuple<Value *, unsigned>> &ValueReplacementMap) {
8825 auto GetUpdatedDIVariable = [&](
DILocalVariable *OldVar,
unsigned arg) {
8829 if (NewVar && (arg == NewVar->
getArg()))
8839 auto UpdateDebugRecord = [&](
auto *DR) {
8842 for (
auto Loc : DR->location_ops()) {
8843 auto Iter = ValueReplacementMap.find(
Loc);
8844 if (Iter != ValueReplacementMap.end()) {
8845 DR->replaceVariableLocationOp(
Loc, std::get<0>(Iter->second));
8846 ArgNo = std::get<1>(Iter->second) + 1;
8850 DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo));
8855 if (DVR->getNumVariableLocationOps() != 1u) {
8856 DVR->setKillLocation();
8859 Value *
Loc = DVR->getVariableLocationOp(0u);
8866 RequiredBB = &DVR->getFunction()->getEntryBlock();
8868 if (RequiredBB && RequiredBB != CurBB) {
8880 "Unexpected debug intrinsic");
8882 UpdateDebugRecord(&DVR);
8883 MoveDebugRecordToCorrectBlock(&DVR);
8886 for (
auto *DVR : DVRsToDelete)
8887 DVR->getMarker()->MarkedInstr->dropOneDbgRecord(DVR);
8891 Module *M = Func->getParent();
8894 DB.createQualifiedType(dwarf::DW_TAG_pointer_type,
nullptr);
8895 unsigned ArgNo = Func->arg_size();
8897 NewSP,
"dyn_ptr", ArgNo, NewSP->
getFile(), 0, VoidPtrTy,
8898 false, DINode::DIFlags::FlagArtificial);
8900 Argument *LastArg = Func->getArg(Func->arg_size() - 1);
8901 DB.insertDeclare(LastArg, Var, DB.createExpression(),
Loc,
8922 for (
auto &Arg : Inputs)
8923 ParameterTypes.
push_back(Arg->getType()->isPointerTy()
8927 for (
auto &Arg : Inputs)
8928 ParameterTypes.
push_back(Arg->getType());
8936 auto BB = Builder.GetInsertBlock();
8937 auto M = BB->getModule();
8948 if (TargetCpuAttr.isStringAttribute())
8949 Func->addFnAttr(TargetCpuAttr);
8951 auto TargetFeaturesAttr = ParentFn->
getFnAttribute(
"target-features");
8952 if (TargetFeaturesAttr.isStringAttribute())
8953 Func->addFnAttr(TargetFeaturesAttr);
8958 OMPBuilder.
emitUsed(
"llvm.compiler.used", {ExecMode});
8969 Builder.SetInsertPoint(EntryBB);
8975 BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock();
8985 splitBB(Builder,
true,
"outlined.body");
8992 Builder.SetInsertPoint(ExitBB);
8999 Builder.CreateRetVoid();
9003 auto AllocaIP = Builder.saveIP();
9008 const auto &ArgRange =
make_range(Func->arg_begin(), Func->arg_end() - 1);
9040 if (Instr->getFunction() == Func)
9041 Instr->replaceUsesOfWith(
Input, InputCopy);
9047 for (
auto InArg :
zip(Inputs, ArgRange)) {
9049 Argument &Arg = std::get<1>(InArg);
9050 Value *InputCopy =
nullptr;
9053 Arg,
Input, InputCopy, AllocaIP, Builder.saveIP(),
9057 Builder.restoreIP(*AfterIP);
9058 ValueReplacementMap[
Input] = std::make_tuple(InputCopy, Arg.
getArgNo());
9078 DeferredReplacement.push_back(std::make_pair(
Input, InputCopy));
9085 ReplaceValue(
Input, InputCopy, Func);
9089 for (
auto Deferred : DeferredReplacement)
9090 ReplaceValue(std::get<0>(Deferred), std::get<1>(Deferred), Func);
9093 ValueReplacementMap);
9101 Value *TaskWithPrivates,
9102 Type *TaskWithPrivatesTy) {
9104 Type *TaskTy = OMPIRBuilder.Task;
9107 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 0);
9108 Value *Shareds = TaskT;
9118 if (TaskWithPrivatesTy != TaskTy)
9119 Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
9136 const size_t NumOffloadingArrays,
const int SharedArgsOperandNo) {
9141 assert((!NumOffloadingArrays || PrivatesTy) &&
9142 "PrivatesTy cannot be nullptr when there are offloadingArrays"
9175 Type *TaskPtrTy = OMPBuilder.TaskPtr;
9176 [[maybe_unused]]
Type *TaskTy = OMPBuilder.Task;
9182 ".omp_target_task_proxy_func",
9183 Builder.GetInsertBlock()->getModule());
9184 Value *ThreadId = ProxyFn->getArg(0);
9185 Value *TaskWithPrivates = ProxyFn->getArg(1);
9186 ThreadId->
setName(
"thread.id");
9187 TaskWithPrivates->
setName(
"task");
9189 bool HasShareds = SharedArgsOperandNo > 0;
9190 bool HasOffloadingArrays = NumOffloadingArrays > 0;
9193 Builder.SetInsertPoint(EntryBB);
9199 if (HasOffloadingArrays) {
9200 assert(TaskTy != TaskWithPrivatesTy &&
9201 "If there are offloading arrays to pass to the target"
9202 "TaskTy cannot be the same as TaskWithPrivatesTy");
9205 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 1);
9206 for (
unsigned int i = 0; i < NumOffloadingArrays; ++i)
9208 Builder.CreateStructGEP(PrivatesTy, Privates, i));
9212 auto *ArgStructAlloca =
9214 assert(ArgStructAlloca &&
9215 "Unable to find the alloca instruction corresponding to arguments "
9216 "for extracted function");
9218 std::optional<TypeSize> ArgAllocSize =
9220 assert(ArgStructType && ArgAllocSize &&
9221 "Unable to determine size of arguments for extracted function");
9222 uint64_t StructSize = ArgAllocSize->getFixedValue();
9225 Builder.CreateAlloca(ArgStructType,
nullptr,
"structArg");
9227 Value *SharedsSize = Builder.getInt64(StructSize);
9230 OMPBuilder, Builder, TaskWithPrivates, TaskWithPrivatesTy);
9232 Builder.CreateMemCpy(
9233 NewArgStructAlloca, NewArgStructAlloca->
getAlign(), LoadShared,
9235 KernelLaunchArgs.
push_back(NewArgStructAlloca);
9238 Builder.CreateRetVoid();
9244 return GEP->getSourceElementType();
9246 return Alloca->getAllocatedType();
9269 if (OffloadingArraysToPrivatize.
empty())
9270 return OMPIRBuilder.Task;
9273 for (
Value *V : OffloadingArraysToPrivatize) {
9274 assert(V->getType()->isPointerTy() &&
9275 "Expected pointer to array to privatize. Got a non-pointer value "
9278 assert(ArrayTy &&
"ArrayType cannot be nullptr");
9284 "struct.task_with_privates");
9298 EntryFnName, Inputs, CBFunc,
9303 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
9440 TargetTaskAllocaBB->
begin());
9443 auto OI = std::make_unique<OutlineInfo>();
9444 OI->EntryBB = TargetTaskAllocaBB;
9445 OI->OuterAllocBB = AllocaIP.
getBlock();
9450 Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP,
"global.tid",
false));
9453 Builder.restoreIP(TargetTaskBodyIP);
9454 if (
Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
9472 bool NeedsTargetTask = HasNoWait && DeviceID;
9473 if (NeedsTargetTask) {
9479 OffloadingArraysToPrivatize.
push_back(V);
9480 OI->ExcludeArgsFromAggregate.push_back(V);
9484 OI->PostOutlineCB = [
this, ToBeDeleted, Dependencies, NeedsTargetTask,
9485 DeviceID, OffloadingArraysToPrivatize](
9488 "there must be a single user for the outlined function");
9502 const unsigned int NumStaleCIArgs = StaleCI->
arg_size();
9503 bool HasShareds = NumStaleCIArgs > OffloadingArraysToPrivatize.
size() + 1;
9505 NumStaleCIArgs == (OffloadingArraysToPrivatize.
size() + 2)) &&
9506 "Wrong number of arguments for StaleCI when shareds are present");
9507 int SharedArgOperandNo =
9508 HasShareds ? OffloadingArraysToPrivatize.
size() + 1 : 0;
9514 if (!OffloadingArraysToPrivatize.
empty())
9519 *
this,
Builder, StaleCI, PrivatesTy, TaskWithPrivatesTy,
9520 OffloadingArraysToPrivatize.
size(), SharedArgOperandNo);
9522 LLVM_DEBUG(
dbgs() <<
"Proxy task entry function created: " << *ProxyFn
9525 Builder.SetInsertPoint(StaleCI);
9542 OMPRTL___kmpc_omp_target_task_alloc);
9554 M.getDataLayout().getTypeStoreSize(TaskWithPrivatesTy));
9561 auto *ArgStructAlloca =
9563 assert(ArgStructAlloca &&
9564 "Unable to find the alloca instruction corresponding to arguments "
9565 "for extracted function");
9566 std::optional<TypeSize> ArgAllocSize =
9569 "Unable to determine size of arguments for extracted function");
9570 SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
9589 TaskSize, SharedsSize,
9592 if (NeedsTargetTask) {
9593 assert(DeviceID &&
"Expected non-empty device ID.");
9603 *
this,
Builder, TaskData, TaskWithPrivatesTy);
9604 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
9607 if (!OffloadingArraysToPrivatize.
empty()) {
9609 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskData, 1);
9610 for (
unsigned int i = 0; i < OffloadingArraysToPrivatize.
size(); ++i) {
9611 Value *PtrToPrivatize = OffloadingArraysToPrivatize[i];
9618 "ElementType should match ArrayType");
9621 Value *Dst =
Builder.CreateStructGEP(PrivatesTy, Privates, i);
9623 Dst, Alignment, PtrToPrivatize, Alignment,
9624 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(ElementType)));
9628 Value *DepArray =
nullptr;
9629 Value *NumDeps =
nullptr;
9632 NumDeps = Dependencies.
NumDeps;
9633 }
else if (!Dependencies.
Deps.empty()) {
9635 NumDeps =
Builder.getInt32(Dependencies.
Deps.size());
9646 if (!NeedsTargetTask) {
9655 ConstantInt::get(
Builder.getInt32Ty(), 0),
9668 }
else if (DepArray) {
9676 {Ident, ThreadID, TaskData, NumDeps, DepArray,
9677 ConstantInt::get(
Builder.getInt32Ty(), 0),
9687 I->eraseFromParent();
9692 << *(
Builder.GetInsertBlock()) <<
"\n");
9694 << *(
Builder.GetInsertBlock()->getParent()->getParent())
9706 CustomMapperCB, IsNonContiguous, DeviceAddrCB))
9729 Builder.restoreIP(IP);
9735 return Builder.saveIP();
9738 bool HasDependencies = !Dependencies.
empty();
9739 bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
9756 if (OutlinedFnID && DeviceID)
9758 EmitTargetCallFallbackCB, KArgs,
9759 DeviceID, RTLoc, TargetTaskAllocaIP);
9767 return EmitTargetCallFallbackCB(OMPBuilder.
Builder.
saveIP());
9774 auto &&EmitTargetCallElse =
9781 if (RequiresOuterTargetTask) {
9788 Dependencies, EmptyRTArgs, HasNoWait);
9790 return EmitTargetCallFallbackCB(Builder.saveIP());
9793 Builder.restoreIP(AfterIP);
9797 auto &&EmitTargetCallThen =
9801 Info.HasNoWait = HasNoWait;
9806 AllocaIP, Builder.saveIP(), Info, RTArgs, MapInfo, CustomMapperCB,
9812 for (
auto [DefaultVal, RuntimeVal] :
9814 NumTeamsC.
push_back(RuntimeVal ? RuntimeVal
9815 : Builder.getInt32(DefaultVal));
9819 auto InitMaxThreadsClause = [&Builder](
Value *
Clause) {
9821 Clause = Builder.CreateIntCast(
Clause, Builder.getInt32Ty(),
9825 auto CombineMaxThreadsClauses = [&Builder](
Value *
Clause,
Value *&Result) {
9828 Result ? Builder.CreateSelect(Builder.CreateICmpULT(Result,
Clause),
9836 Value *MaxThreadsClause =
9838 ? InitMaxThreadsClause(RuntimeAttrs.
MaxThreads)
9841 for (
auto [TeamsVal, TargetVal] :
zip_equal(
9843 Value *TeamsThreadLimitClause = InitMaxThreadsClause(TeamsVal);
9844 Value *NumThreads = InitMaxThreadsClause(TargetVal);
9846 CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads);
9847 CombineMaxThreadsClauses(MaxThreadsClause, NumThreads);
9849 NumThreadsC.
push_back(NumThreads ? NumThreads : Builder.getInt32(0));
9852 unsigned NumTargetItems = Info.NumberOfPtrs;
9860 Builder.getInt64Ty(),
9862 : Builder.getInt64(0);
9866 DynCGroupMem = Builder.getInt32(0);
9869 NumTargetItems, RTArgs, TripCount, NumTeamsC, NumThreadsC, DynCGroupMem,
9870 HasNoWait,
false, DynCGroupMemFallback);
9877 if (RequiresOuterTargetTask)
9879 RTLoc, AllocaIP, Dependencies,
9880 KArgs.
RTArgs, Info.HasNoWait);
9883 Builder, OutlinedFnID, EmitTargetCallFallbackCB, KArgs,
9884 RuntimeAttrs.
DeviceID, RTLoc, AllocaIP);
9887 Builder.restoreIP(AfterIP);
9894 if (!OutlinedFnID) {
9895 cantFail(EmitTargetCallElse(AllocaIP, Builder.saveIP(), DeallocBlocks));
9901 cantFail(EmitTargetCallThen(AllocaIP, Builder.saveIP(), DeallocBlocks));
9906 EmitTargetCallElse, AllocaIP));
9919 bool HasNowait,
Value *DynCGroupMem,
9933 *
this,
Builder, IsOffloadEntry, EntryInfo, DefaultAttrs, OutlinedFn,
9934 OutlinedFnID, Inputs, CBFunc, ArgAccessorFuncCB))
9940 if (!
Config.isTargetDevice())
9942 RuntimeAttrs, IfCond, OutlinedFn, OutlinedFnID, Inputs,
9943 GenMapInfoCB, CustomMapperCB, Dependencies, HasNowait,
9944 DynCGroupMem, DynCGroupMemFallback);
9958 return OS.
str().str();
9963 return OpenMPIRBuilder::getNameWithSeparators(Parts,
Config.firstSeparator(),
9969 auto &Elem = *
InternalVars.try_emplace(Name,
nullptr).first;
9971 assert(Elem.second->getValueType() == Ty &&
9972 "OMP internal variable has different type than requested");
9985 :
M.getTargetTriple().isAMDGPU()
9987 :
DL.getDefaultGlobalsAddressSpace();
9996 const llvm::Align PtrAlign =
DL.getPointerABIAlignment(AddressSpaceVal);
9997 GV->setAlignment(std::max(TypeAlign, PtrAlign));
10001 return Elem.second;
10004Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
10005 std::string Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
10006 std::string Name = getNameWithSeparators({Prefix,
"var"},
".",
".");
10017 return SizePtrToInt;
10022 std::string VarName) {
10030 return MaptypesArrayGlobal;
10035 unsigned NumOperands,
10044 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
10048 ArrI64Ty,
nullptr,
".offload_sizes");
10059 int64_t DeviceID,
unsigned NumOperands) {
10065 Value *ArgsBaseGEP =
10067 {Builder.getInt32(0), Builder.getInt32(0)});
10070 {Builder.getInt32(0), Builder.getInt32(0)});
10071 Value *ArgSizesGEP =
10073 {Builder.getInt32(0), Builder.getInt32(0)});
10077 Builder.getInt32(NumOperands),
10078 ArgsBaseGEP, ArgsGEP, ArgSizesGEP,
10079 MaptypesArg, MapnamesArg, NullPtr});
10086 assert((!ForEndCall || Info.separateBeginEndCalls()) &&
10087 "expected region end call to runtime only when end call is separate");
10089 auto VoidPtrTy = UnqualPtrTy;
10090 auto VoidPtrPtrTy = UnqualPtrTy;
10092 auto Int64PtrTy = UnqualPtrTy;
10094 if (!Info.NumberOfPtrs) {
10106 Info.RTArgs.BasePointersArray,
10109 ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.PointersArray,
10113 ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray,
10117 ForEndCall && Info.RTArgs.MapTypesArrayEnd ? Info.RTArgs.MapTypesArrayEnd
10118 : Info.RTArgs.MapTypesArray,
10124 if (!Info.EmitDebug)
10128 ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.MapNamesArray,
10133 if (!Info.HasMapper)
10137 Builder.CreatePointerCast(Info.RTArgs.MappersArray, VoidPtrPtrTy);
10158 "struct.descriptor_dim");
10160 enum { OffsetFD = 0, CountFD, StrideFD };
10164 for (
unsigned I = 0, L = 0, E = NonContigInfo.
Dims.
size();
I < E; ++
I) {
10167 if (NonContigInfo.
Dims[
I] == 1)
10172 Builder.CreateAlloca(ArrayTy,
nullptr,
"dims");
10173 Builder.restoreIP(CodeGenIP);
10174 for (
unsigned II = 0, EE = NonContigInfo.
Dims[
I];
II < EE; ++
II) {
10175 unsigned RevIdx = EE -
II - 1;
10179 Value *OffsetLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, OffsetFD);
10181 NonContigInfo.
Offsets[L][RevIdx], OffsetLVal,
10182 M.getDataLayout().getPrefTypeAlign(OffsetLVal->
getType()));
10184 Value *CountLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, CountFD);
10186 NonContigInfo.
Counts[L][RevIdx], CountLVal,
10187 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
10189 Value *StrideLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, StrideFD);
10191 NonContigInfo.
Strides[L][RevIdx], StrideLVal,
10192 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
10195 Builder.restoreIP(CodeGenIP);
10196 Value *DAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
10197 DimsAddr,
Builder.getPtrTy());
10200 Info.RTArgs.PointersArray, 0,
I);
10202 DAddr,
P,
M.getDataLayout().getPrefTypeAlign(
Builder.getPtrTy()));
10207void OpenMPIRBuilder::emitUDMapperArrayInitOrDel(
10211 StringRef Prefix = IsInit ?
".init" :
".del";
10217 Builder.CreateICmpSGT(
Size, Builder.getInt64(1),
"omp.arrayinit.isarray");
10218 Value *DeleteBit = Builder.CreateAnd(
10221 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10222 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
10227 Value *BaseIsBegin = Builder.CreateICmpNE(
Base, Begin);
10228 Cond = Builder.CreateOr(IsArray, BaseIsBegin);
10229 DeleteCond = Builder.CreateIsNull(
10234 DeleteCond =
Builder.CreateIsNotNull(
10250 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10251 OpenMPOffloadMappingFlags::OMP_MAP_TO |
10252 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
10253 MapTypeArg =
Builder.CreateOr(
10256 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10257 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
10261 Value *OffloadingArgs[] = {MapperHandle,
Base, Begin,
10262 ArraySize, MapTypeArg, MapName};
10273 bool PreserveMemberOfFlags) {
10289 MapperFn->
addFnAttr(Attribute::NoInline);
10290 MapperFn->
addFnAttr(Attribute::NoUnwind);
10300 auto SavedIP =
Builder.saveIP();
10301 Builder.SetInsertPoint(EntryBB);
10313 TypeSize ElementSize =
M.getDataLayout().getTypeStoreSize(ElemTy);
10315 Value *PtrBegin = BeginIn;
10321 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
10322 MapType, MapName, ElementSize, HeadBB,
10333 Builder.CreateICmpEQ(PtrBegin, PtrEnd,
"omp.arraymap.isempty");
10334 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
10340 Builder.CreatePHI(PtrBegin->
getType(), 2,
"omp.arraymap.ptrcurrent");
10341 PtrPHI->addIncoming(PtrBegin, HeadBB);
10346 return Info.takeError();
10350 Value *OffloadingArgs[] = {MapperHandle};
10354 Value *ShiftedPreviousSize =
10358 for (
unsigned I = 0;
I < Info->BasePointers.size(); ++
I) {
10359 Value *CurBaseArg = Info->BasePointers[
I];
10360 Value *CurBeginArg = Info->Pointers[
I];
10361 Value *CurSizeArg = Info->Sizes[
I];
10362 Value *CurNameArg = Info->Names.size()
10368 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10370 Value *MemberMapType;
10371 if (PreserveMemberOfFlags) {
10373 static_cast<uint64_t>(OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
10375 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10377 bool HasMemberOf = (OrigFlags & MemberOfMask) != 0;
10379 MemberMapType =
Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10381 MemberMapType = OriMapType;
10383 MemberMapType =
Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10401 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10402 OpenMPOffloadMappingFlags::OMP_MAP_TO |
10403 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
10413 Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10419 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10420 OpenMPOffloadMappingFlags::OMP_MAP_TO |
10421 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
10427 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10428 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
10429 Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10435 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10436 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
10442 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10443 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
10444 Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10450 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10451 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
10460 CurMapType->
addIncoming(MemberMapType, ToElseBB);
10462 Value *OffloadingArgs[] = {MapperHandle, CurBaseArg, CurBeginArg,
10463 CurSizeArg, CurMapType, CurNameArg};
10465 auto ChildMapperFn = CustomMapperCB(
I);
10466 if (!ChildMapperFn)
10467 return ChildMapperFn.takeError();
10468 if (*ChildMapperFn) {
10483 Value *PtrNext =
Builder.CreateConstGEP1_32(ElemTy, PtrPHI, 1,
10484 "omp.arraymap.next");
10485 PtrPHI->addIncoming(PtrNext, LastBB);
10486 Value *IsDone =
Builder.CreateICmpEQ(PtrNext, PtrEnd,
"omp.arraymap.isdone");
10488 Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10493 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
10494 MapType, MapName, ElementSize, DoneBB,
10508 bool IsNonContiguous,
10512 Info.clearArrayInfo();
10515 if (Info.NumberOfPtrs == 0)
10524 Info.RTArgs.BasePointersArray =
Builder.CreateAlloca(
10525 PointerArrayType,
nullptr,
".offload_baseptrs");
10527 Info.RTArgs.PointersArray =
Builder.CreateAlloca(
10528 PointerArrayType,
nullptr,
".offload_ptrs");
10530 PointerArrayType,
nullptr,
".offload_mappers");
10531 Info.RTArgs.MappersArray = MappersArray;
10538 ConstantInt::get(Int64Ty, 0));
10540 for (
unsigned I = 0, E = CombinedInfo.
Sizes.
size();
I < E; ++
I) {
10541 bool IsNonContigEntry =
10543 (
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10545 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG) != 0);
10548 if (IsNonContigEntry) {
10550 "Index must be in-bounds for NON_CONTIG Dims array");
10552 assert(DimCount > 0 &&
"NON_CONTIG DimCount must be > 0");
10553 ConstSizes[
I] = ConstantInt::get(Int64Ty, DimCount);
10558 ConstSizes[
I] = CI;
10562 RuntimeSizes.
set(
I);
10565 if (RuntimeSizes.
all()) {
10567 Info.RTArgs.SizesArray =
Builder.CreateAlloca(
10568 SizeArrayType,
nullptr,
".offload_sizes");
10574 auto *SizesArrayGbl =
10579 if (!RuntimeSizes.
any()) {
10580 Info.RTArgs.SizesArray = SizesArrayGbl;
10582 unsigned IndexSize =
M.getDataLayout().getIndexSizeInBits(0);
10583 Align OffloadSizeAlign =
M.getDataLayout().getABIIntegerTypeAlignment(64);
10586 SizeArrayType,
nullptr,
".offload_sizes");
10590 Buffer,
M.getDataLayout().getPrefTypeAlign(Buffer->
getType()),
10591 SizesArrayGbl, OffloadSizeAlign,
10596 Info.RTArgs.SizesArray = Buffer;
10604 for (
auto mapFlag : CombinedInfo.
Types)
10606 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10610 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
10616 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
10617 Info.EmitDebug =
true;
10619 Info.RTArgs.MapNamesArray =
10621 Info.EmitDebug =
false;
10626 if (Info.separateBeginEndCalls()) {
10627 bool EndMapTypesDiffer =
false;
10629 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10630 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
10631 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
10632 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
10633 EndMapTypesDiffer =
true;
10636 if (EndMapTypesDiffer) {
10638 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
10643 for (
unsigned I = 0;
I < Info.NumberOfPtrs; ++
I) {
10646 ArrayType::get(PtrTy, Info.NumberOfPtrs), Info.RTArgs.BasePointersArray,
10648 Builder.CreateAlignedStore(BPVal, BP,
10649 M.getDataLayout().getPrefTypeAlign(PtrTy));
10651 if (Info.requiresDevicePointerInfo()) {
10653 CodeGenIP =
Builder.saveIP();
10655 Info.DevicePtrInfoMap[BPVal] = {BP,
Builder.CreateAlloca(PtrTy)};
10656 Builder.restoreIP(CodeGenIP);
10658 DeviceAddrCB(
I, Info.DevicePtrInfoMap[BPVal].second);
10660 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
10662 DeviceAddrCB(
I, BP);
10668 ArrayType::get(PtrTy, Info.NumberOfPtrs), Info.RTArgs.PointersArray, 0,
10671 Builder.CreateAlignedStore(PVal,
P,
10672 M.getDataLayout().getPrefTypeAlign(PtrTy));
10674 if (RuntimeSizes.
test(
I)) {
10676 ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray,
10682 S,
M.getDataLayout().getPrefTypeAlign(PtrTy));
10685 unsigned IndexSize =
M.getDataLayout().getIndexSizeInBits(0);
10688 auto CustomMFunc = CustomMapperCB(
I);
10690 return CustomMFunc.takeError();
10692 MFunc =
Builder.CreatePointerCast(*CustomMFunc, PtrTy);
10695 PointerArrayType, MappersArray,
10698 MFunc, MAddr,
M.getDataLayout().getPrefTypeAlign(MAddr->
getType()));
10702 Info.NumberOfPtrs == 0)
10719 Builder.ClearInsertionPoint();
10750 auto CondConstant = CI->getSExtValue();
10752 return ThenGen(AllocaIP,
Builder.saveIP(), DeallocBlocks);
10754 return ElseGen(AllocaIP,
Builder.saveIP(), DeallocBlocks);
10764 Builder.CreateCondBr(
Cond, ThenBlock, ElseBlock);
10767 if (
Error Err = ThenGen(AllocaIP,
Builder.saveIP(), DeallocBlocks))
10773 if (
Error Err = ElseGen(AllocaIP,
Builder.saveIP(), DeallocBlocks))
10782bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
10786 "Unexpected Atomic Ordering.");
10788 bool Flush =
false;
10850 assert(
X.Var->getType()->isPointerTy() &&
10851 "OMP Atomic expects a pointer to target memory");
10852 Type *XElemTy =
X.ElemTy;
10855 "OMP atomic read expected a scalar type");
10857 Value *XRead =
nullptr;
10861 Builder.CreateLoad(XElemTy,
X.Var,
X.IsVolatile,
"omp.atomic.read");
10870 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
10873 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
10875 XRead = AtomicLoadRes.first;
10882 Builder.CreateLoad(IntCastTy,
X.Var,
X.IsVolatile,
"omp.atomic.load");
10885 XRead =
Builder.CreateBitCast(XLoad, XElemTy,
"atomic.flt.cast");
10887 XRead =
Builder.CreateIntToPtr(XLoad, XElemTy,
"atomic.ptr.cast");
10890 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Read);
10891 Builder.CreateStore(XRead, V.Var, V.IsVolatile);
10902 assert(
X.Var->getType()->isPointerTy() &&
10903 "OMP Atomic expects a pointer to target memory");
10904 Type *XElemTy =
X.ElemTy;
10907 "OMP atomic write expected a scalar type");
10915 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
10918 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
10926 Builder.CreateBitCast(Expr, IntCastTy,
"atomic.src.int.cast");
10931 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Write);
10938 AtomicUpdateCallbackTy &UpdateOp,
bool IsXBinopExpr,
10939 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10945 Type *XTy =
X.Var->getType();
10947 "OMP Atomic expects a pointer to target memory");
10948 Type *XElemTy =
X.ElemTy;
10951 "OMP atomic update expected a scalar or struct type");
10954 "OpenMP atomic does not support LT or GT operations");
10958 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
X.IsVolatile,
10959 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
10961 return AtomicResult.takeError();
10962 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Update);
10967Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
10971 return Builder.CreateAdd(Src1, Src2);
10973 return Builder.CreateSub(Src1, Src2);
10975 return Builder.CreateAnd(Src1, Src2);
10977 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
10979 return Builder.CreateOr(Src1, Src2);
10981 return Builder.CreateXor(Src1, Src2);
11020Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
11023 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr,
11024 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
11026 bool emitRMWOp =
false;
11034 emitRMWOp = XElemTy;
11037 emitRMWOp = (IsXBinopExpr && XElemTy);
11044 std::pair<Value *, Value *> Res;
11046 AtomicRMWInst *RMWInst =
11047 Builder.CreateAtomicRMW(RMWOp,
X, Expr, llvm::MaybeAlign(), AO);
11048 if (
T.isAMDGPU()) {
11049 if (IsIgnoreDenormalMode)
11050 RMWInst->
setMetadata(
"amdgpu.ignore.denormal.mode",
11052 if (!IsFineGrainedMemory)
11053 RMWInst->
setMetadata(
"amdgpu.no.fine.grained.memory",
11055 if (!IsRemoteMemory)
11059 Res.first = RMWInst;
11064 Res.second = Res.first;
11066 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
11069 Builder.CreateLoad(XElemTy,
X,
X->getName() +
".atomic.load");
11075 OpenMPIRBuilder::AtomicInfo atomicInfo(
11077 OldVal->
getAlign(),
true , AllocaIP,
X);
11078 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
11081 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
11088 AllocaInst *NewAtomicAddr =
Builder.CreateAlloca(XElemTy);
11089 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
11090 Builder.SetInsertPoint(ContBB);
11092 PHI->addIncoming(AtomicLoadRes.first, CurBB);
11094 Expected<Value *> CBResult = UpdateOp(OldExprVal,
Builder);
11097 Value *Upd = *CBResult;
11098 Builder.CreateStore(Upd, NewAtomicAddr);
11101 auto Result = atomicInfo.EmitAtomicCompareExchangeLibcall(
11102 AtomicLoadRes.second, NewAtomicAddr, AO, Failure);
11103 LoadInst *PHILoad =
Builder.CreateLoad(XElemTy,
Result.first);
11104 PHI->addIncoming(PHILoad,
Builder.GetInsertBlock());
11107 Res.first = OldExprVal;
11110 if (UnreachableInst *ExitTI =
11113 Builder.SetInsertPoint(ExitBB);
11115 Builder.SetInsertPoint(ExitTI);
11118 IntegerType *IntCastTy =
11121 Builder.CreateLoad(IntCastTy,
X,
X->getName() +
".atomic.load");
11131 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
11138 AllocaInst *NewAtomicAddr =
Builder.CreateAlloca(XElemTy);
11139 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
11140 Builder.SetInsertPoint(ContBB);
11142 PHI->addIncoming(OldVal, CurBB);
11147 OldExprVal =
Builder.CreateBitCast(
PHI, XElemTy,
11148 X->getName() +
".atomic.fltCast");
11150 OldExprVal =
Builder.CreateIntToPtr(
PHI, XElemTy,
11151 X->getName() +
".atomic.ptrCast");
11155 Expected<Value *> CBResult = UpdateOp(OldExprVal,
Builder);
11158 Value *Upd = *CBResult;
11159 Builder.CreateStore(Upd, NewAtomicAddr);
11160 LoadInst *DesiredVal =
Builder.CreateLoad(IntCastTy, NewAtomicAddr);
11164 X,
PHI, DesiredVal, llvm::MaybeAlign(), AO, Failure);
11165 Result->setVolatile(VolatileX);
11166 Value *PreviousVal =
Builder.CreateExtractValue(Result, 0);
11167 Value *SuccessFailureVal =
Builder.CreateExtractValue(Result, 1);
11168 PHI->addIncoming(PreviousVal,
Builder.GetInsertBlock());
11169 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
11171 Res.first = OldExprVal;
11175 if (UnreachableInst *ExitTI =
11178 Builder.SetInsertPoint(ExitBB);
11180 Builder.SetInsertPoint(ExitTI);
11191 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr,
11192 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
11197 Type *XTy =
X.Var->getType();
11199 "OMP Atomic expects a pointer to target memory");
11200 Type *XElemTy =
X.ElemTy;
11203 "OMP atomic capture expected a scalar or struct type");
11205 "OpenMP atomic does not support LT or GT operations");
11212 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
X.IsVolatile,
11213 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
11216 Value *CapturedVal =
11217 (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
11218 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
11220 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Capture);
11228 bool IsFailOnly,
bool IsWeak) {
11232 IsPostfixUpdate, IsFailOnly, Failure, IsWeak);
11244 assert(
X.Var->getType()->isPointerTy() &&
11245 "OMP atomic expects a pointer to target memory");
11248 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
11249 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
11252 bool IsInteger = E->getType()->isIntegerTy();
11254 if (
Op == OMPAtomicCompareOp::EQ) {
11257 Value *OldValue =
nullptr;
11258 Value *SuccessOrFail =
nullptr;
11296 X.Var->getName() +
".atomic.load");
11302 Value *EIsNaN =
Builder.CreateFCmpUNO(E, E,
"atomic.e.isnan");
11303 Value *XIsNaN =
Builder.CreateFCmpUNO(XFP, XFP,
"atomic.x.isnan");
11304 Value *EitherNaN =
Builder.CreateOr(EIsNaN, XIsNaN,
"atomic.either.nan");
11309 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
11313 M.getContext(),
X.Var->getName() +
".atomic.nan",
F, ExitBB);
11315 M.getContext(),
X.Var->getName() +
".atomic.notnan",
F, ExitBB);
11317 M.getContext(),
X.Var->getName() +
".atomic.zero",
F, ExitBB);
11319 M.getContext(),
X.Var->getName() +
".atomic.normal",
F, ExitBB);
11323 Builder.SetInsertPoint(CurBB);
11324 Builder.CreateCondBr(EitherNaN, NaNBB, NotNaNBB);
11327 Builder.SetInsertPoint(NaNBB);
11331 Builder.SetInsertPoint(NotNaNBB);
11334 X.Var->getName() +
".atomic.xiszero");
11336 "atomic.e.iszero");
11337 Value *BothZero =
Builder.CreateAnd(XIsZero, EIsZero,
"atomic.both.zero");
11338 Builder.CreateCondBr(BothZero, ZeroBB, NormalBB);
11341 Builder.SetInsertPoint(ZeroBB);
11343 X.Var, XCurr, DBCast,
MaybeAlign(), AO, Failure);
11345 Value *OldZero =
Builder.CreateExtractValue(ResZero, 0);
11346 Value *OkZero =
Builder.CreateExtractValue(ResZero, 1);
11350 Builder.SetInsertPoint(NormalBB);
11352 X.Var, EBCast, DBCast,
MaybeAlign(), AO, Failure);
11354 Value *OldNormal =
Builder.CreateExtractValue(ResNormal, 0);
11355 Value *OkNormal =
Builder.CreateExtractValue(ResNormal, 1);
11361 Builder.CreatePHI(IntCastTy, 3,
X.Var->getName() +
".atomic.old");
11366 X.Var->getName() +
".atomic.ok");
11373 Builder.SetInsertPoint(ExitBB);
11378 OldValue =
Builder.CreateBitCast(OldIntPHI,
X.ElemTy,
11379 X.Var->getName() +
".atomic.old.fp");
11380 SuccessOrFail = SuccessPHI;
11388 Result =
Builder.CreateAtomicCmpXchg(
X.Var, EBCast, DBCast,
11394 Result->setWeak(IsWeak);
11397 OldValue =
Builder.CreateExtractValue(Result, 0);
11399 OldValue =
Builder.CreateBitCast(OldValue,
X.ElemTy);
11401 "OldValue and V must be of same type");
11402 if (IsPostfixUpdate) {
11403 Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
11405 SuccessOrFail =
Builder.CreateExtractValue(Result, 1);
11409 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
11411 CurBBTI,
X.Var->getName() +
".atomic.exit");
11417 Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
11419 Builder.SetInsertPoint(ContBB);
11420 Builder.CreateStore(OldValue, V.Var);
11426 Builder.SetInsertPoint(ExitBB);
11428 Builder.SetInsertPoint(ExitTI);
11431 Value *CapturedValue =
11432 Builder.CreateSelect(SuccessOrFail, E, OldValue);
11433 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
11439 assert(R.Var->getType()->isPointerTy() &&
11440 "r.var must be of pointer type");
11441 assert(R.ElemTy->isIntegerTy() &&
"r must be of integral type");
11443 Value *SuccessFailureVal =
11444 Builder.CreateExtractValue(Result, 1);
11445 Value *ResultCast =
11446 R.IsSigned ?
Builder.CreateSExt(SuccessFailureVal, R.ElemTy)
11447 :
Builder.CreateZExt(SuccessFailureVal, R.ElemTy);
11448 Builder.CreateStore(ResultCast, R.Var, R.IsVolatile);
11457 "OldValue and V must be of same type");
11458 if (IsPostfixUpdate) {
11459 Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
11464 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
11466 CurBBTI,
X.Var->getName() +
".atomic.exit");
11472 Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
11474 Builder.SetInsertPoint(ContBB);
11475 Builder.CreateStore(OldValue, V.Var);
11481 Builder.SetInsertPoint(ExitBB);
11483 Builder.SetInsertPoint(ExitTI);
11486 Value *CapturedValue =
11487 Builder.CreateSelect(SuccessOrFail, E, OldValue);
11488 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
11494 assert(R.Var->getType()->isPointerTy() &&
11495 "r.var must be of pointer type");
11496 assert(R.ElemTy->isIntegerTy() &&
"r must be of integral type");
11498 Value *ResultCast = R.IsSigned
11499 ?
Builder.CreateSExt(SuccessOrFail, R.ElemTy)
11500 :
Builder.CreateZExt(SuccessOrFail, R.ElemTy);
11501 Builder.CreateStore(ResultCast, R.Var, R.IsVolatile);
11505 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
11506 "Op should be either max or min at this point");
11507 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
11518 if (IsXBinopExpr) {
11547 Value *CapturedValue =
nullptr;
11548 if (IsPostfixUpdate) {
11549 CapturedValue = OldValue;
11574 Value *NonAtomicCmp =
Builder.CreateCmp(Pred, OldValue, E);
11575 CapturedValue =
Builder.CreateSelect(NonAtomicCmp, E, OldValue);
11577 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
11581 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Compare);
11601 if (&OuterAllocaBB ==
Builder.GetInsertBlock()) {
11628 bool SubClausesPresent =
11629 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
11631 if (!
Config.isTargetDevice() && SubClausesPresent) {
11632 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
11633 "if lowerbound is non-null, then upperbound must also be non-null "
11634 "for bounds on num_teams");
11636 if (NumTeamsUpper ==
nullptr)
11637 NumTeamsUpper =
Builder.getInt32(0);
11639 if (NumTeamsLower ==
nullptr)
11640 NumTeamsLower = NumTeamsUpper;
11644 "argument to if clause must be an integer value");
11648 IfExpr =
Builder.CreateICmpNE(IfExpr,
11649 ConstantInt::get(IfExpr->
getType(), 0));
11650 NumTeamsUpper =
Builder.CreateSelect(
11651 IfExpr, NumTeamsUpper,
Builder.getInt32(1),
"numTeamsUpper");
11654 NumTeamsLower =
Builder.CreateSelect(
11655 IfExpr, NumTeamsLower,
Builder.getInt32(1),
"numTeamsLower");
11658 if (ThreadLimit ==
nullptr)
11659 ThreadLimit =
Builder.getInt32(0);
11663 Value *NumTeamsLowerInt32 =
11665 Value *NumTeamsUpperInt32 =
11667 Value *ThreadLimitInt32 =
11674 {Ident, ThreadNum, NumTeamsLowerInt32, NumTeamsUpperInt32,
11675 ThreadLimitInt32});
11680 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP, ExitBB))
11683 auto OI = std::make_unique<OutlineInfo>();
11684 OI->EntryBB = AllocaBB;
11685 OI->ExitBB = ExitBB;
11686 OI->OuterAllocBB = &OuterAllocaBB;
11692 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
11694 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
11696 auto HostPostOutlineCB = [
this, Ident,
11697 ToBeDeleted](
Function &OutlinedFn)
mutable {
11702 "there must be a single user for the outlined function");
11707 "Outlined function must have two or three arguments only");
11709 bool HasShared = OutlinedFn.
arg_size() == 3;
11717 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
11718 "outlined function.");
11719 Builder.SetInsertPoint(StaleCI);
11726 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
11730 I->eraseFromParent();
11733 if (!
Config.isTargetDevice())
11734 OI->PostOutlineCB = HostPostOutlineCB;
11738 Builder.SetInsertPoint(ExitBB);
11751 if (OuterAllocaBB ==
Builder.GetInsertBlock()) {
11766 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP, ExitBB))
11771 if (
Config.isTargetDevice()) {
11772 auto OI = std::make_unique<OutlineInfo>();
11773 OI->OuterAllocBB = OuterAllocIP.
getBlock();
11774 OI->EntryBB = AllocaBB;
11775 OI->ExitBB = ExitBB;
11776 OI->OuterDeallocBBs.reserve(OuterDeallocBlocks.
size());
11777 copy(OuterDeallocBlocks, OI->OuterDeallocBBs.
end());
11781 Builder.SetInsertPoint(ExitBB);
11788 std::string VarName) {
11797 return MapNamesArrayGlobal;
11802void OpenMPIRBuilder::initializeTypes(
Module &M) {
11806 unsigned ProgramAS = M.getDataLayout().getProgramAddressSpace();
11807#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
11808#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
11809 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
11810 VarName##PtrTy = PointerType::get(Ctx, DefaultTargetAS);
11811#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
11812 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
11813 VarName##Ptr = PointerType::get(Ctx, ProgramAS);
11814#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
11815 T = StructType::getTypeByName(Ctx, StructName); \
11817 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
11819 VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS);
11820#include "llvm/Frontend/OpenMP/OMPKinds.def"
11831 while (!Worklist.
empty()) {
11835 if (
BlockSet.insert(SuccBB).second)
11840std::unique_ptr<CodeExtractor>
11842 bool ArgsInZeroAddressSpace,
11844 return std::make_unique<CodeExtractor>(
11854 Suffix.
str(), ArgsInZeroAddressSpace);
11857std::unique_ptr<CodeExtractor> DeviceSharedMemOutlineInfo::createCodeExtractor(
11859 return std::make_unique<DeviceSharedMemCodeExtractor>(
11860 OMPBuilder, Blocks,
nullptr,
11868 OuterDeallocBBs.empty()
11871 Suffix.
str(), ArgsInZeroAddressSpace);
11881 Name.empty() ? Addr->
getName() : Name,
Size, Flags, 0);
11893 Fn->
addFnAttr(
"uniform-work-group-size");
11894 Fn->
addFnAttr(Attribute::MustProgress);
11912 auto &&GetMDInt = [
this](
unsigned V) {
11919 NamedMDNode *MD =
M.getOrInsertNamedMetadata(
"omp_offload.info");
11920 auto &&TargetRegionMetadataEmitter =
11921 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
11936 GetMDInt(E.getKind()), GetMDInt(EntryInfo.DeviceID),
11937 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
11938 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
11939 GetMDInt(E.getOrder())};
11942 OrderedEntries[E.getOrder()] = std::make_pair(&E, EntryInfo);
11951 auto &&DeviceGlobalVarMetadataEmitter =
11952 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
11962 Metadata *
Ops[] = {GetMDInt(E.getKind()), GetMDString(MangledName),
11963 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
11967 OrderedEntries[E.getOrder()] = std::make_pair(&E, varInfo);
11974 DeviceGlobalVarMetadataEmitter);
11976 for (
const auto &E : OrderedEntries) {
11977 assert(E.first &&
"All ordered entries must exist!");
11978 if (
const auto *CE =
11981 if (!CE->getID() || !CE->getAddress()) {
11985 if (!
M.getNamedValue(FnName))
11993 }
else if (
const auto *CE =
dyn_cast<
12002 if (
Config.isTargetDevice() &&
Config.hasRequiresUnifiedSharedMemory())
12004 if (!CE->getAddress()) {
12009 if (CE->getVarSize() == 0)
12013 assert(((
Config.isTargetDevice() && !CE->getAddress()) ||
12014 (!
Config.isTargetDevice() && CE->getAddress())) &&
12015 "Declaret target link address is set.");
12016 if (
Config.isTargetDevice())
12018 if (!CE->getAddress()) {
12025 if (!CE->getAddress()) {
12038 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
12042 OMPTargetGlobalVarEntryIndirectVTable))
12051 Flags, CE->getLinkage(), CE->getVarName());
12054 Flags, CE->getLinkage());
12065 if (
Config.hasRequiresFlags() && !
Config.isTargetDevice())
12071 Config.getRequiresFlags());
12081 OS <<
"_" <<
Count;
12086 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
12089 EntryInfo.
Line, NewCount);
12097 auto FileIDInfo = CallBack();
12101 FileID =
Status->getUniqueID().getFile();
12105 FileID =
hash_value(std::get<0>(FileIDInfo));
12109 std::get<1>(FileIDInfo));
12115 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
12117 !(Remain & 1); Remain = Remain >> 1)
12135 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
12137 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
12144 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
12150 Flags &=
~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
12151 Flags |= MemberOfFlag;
12157 bool IsDeclaration,
bool IsExternallyVisible,
12159 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
12160 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
12161 std::function<
Constant *()> GlobalInitializer,
12172 Config.hasRequiresUnifiedSharedMemory())) {
12177 if (!IsExternallyVisible)
12179 OS <<
"_decl_tgt_ref_ptr";
12182 Value *Ptr =
M.getNamedValue(PtrName);
12191 if (!
Config.isTargetDevice()) {
12192 if (GlobalInitializer)
12193 GV->setInitializer(GlobalInitializer());
12199 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
12200 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
12201 GlobalInitializer, VariableLinkage, LlvmPtrTy,
cast<Constant>(Ptr));
12213 bool IsDeclaration,
bool IsExternallyVisible,
12215 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
12216 std::vector<Triple> TargetTriple,
12217 std::function<
Constant *()> GlobalInitializer,
12221 (TargetTriple.empty() && !
Config.isTargetDevice()))
12232 !
Config.hasRequiresUnifiedSharedMemory()) {
12234 VarName = MangledName;
12237 if (!IsDeclaration)
12239 M.getDataLayout().getTypeSizeInBits(LlvmVal->
getValueType()), 8);
12242 Linkage = (VariableLinkage) ? VariableLinkage() : LlvmVal->
getLinkage();
12246 if (
Config.isTargetDevice() &&
12255 if (!
M.getNamedValue(RefName)) {
12259 GvAddrRef->setConstant(
true);
12261 GvAddrRef->setInitializer(Addr);
12262 GeneratedRefs.push_back(GvAddrRef);
12271 if (
Config.isTargetDevice()) {
12272 VarName = (Addr) ? Addr->
getName() :
"";
12276 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
12277 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
12278 LlvmPtrTy, GlobalInitializer, VariableLinkage);
12279 VarName = (Addr) ? Addr->
getName() :
"";
12281 VarSize =
M.getDataLayout().getPointerSize();
12300 auto &&GetMDInt = [MN](
unsigned Idx) {
12305 auto &&GetMDString = [MN](
unsigned Idx) {
12307 return V->getString();
12310 switch (GetMDInt(0)) {
12314 case OffloadEntriesInfoManager::OffloadEntryInfo::
12315 OffloadingEntryInfoTargetRegion: {
12325 case OffloadEntriesInfoManager::OffloadEntryInfo::
12326 OffloadingEntryInfoDeviceGlobalVar:
12339 if (HostFilePath.
empty())
12343 if (std::error_code Err = Buf.getError()) {
12345 "OpenMPIRBuilder: " +
12353 if (std::error_code Err =
M.getError()) {
12355 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
12369 "expected a valid insertion block for creating an iterator loop");
12379 Builder.getCurrentDebugLocation(),
"omp.it.cont");
12391 T->eraseFromParent();
12400 if (!BodyBr || BodyBr->getSuccessor() != CLI->
getLatch()) {
12402 "iterator bodygen must terminate the canonical body with an "
12403 "unconditional branch to the loop latch",
12427 for (
const auto &
ParamAttr : ParamAttrs) {
12470 return std::string(Out.
str());
12478 unsigned VecRegSize;
12480 ISADataTy ISAData[] = {
12499 for (
char Mask :
Masked) {
12500 for (
const ISADataTy &
Data : ISAData) {
12503 Out <<
"_ZGV" <<
Data.ISA << Mask;
12505 assert(NumElts &&
"Non-zero simdlen/cdtsize expected");
12519template <
typename T>
12522 StringRef MangledName,
bool OutputBecomesInput,
12526 Out << Prefix << ISA << LMask << VLEN;
12527 if (OutputBecomesInput)
12529 Out << ParSeq <<
'_' << MangledName;
12538 bool OutputBecomesInput,
12543 OutputBecomesInput, Fn);
12545 OutputBecomesInput, Fn);
12549 OutputBecomesInput, Fn);
12551 OutputBecomesInput, Fn);
12555 OutputBecomesInput, Fn);
12557 OutputBecomesInput, Fn);
12562 OutputBecomesInput, Fn);
12573 char ISA,
unsigned NarrowestDataSize,
bool OutputBecomesInput) {
12574 assert((ISA ==
'n' || ISA ==
's') &&
"Expected ISA either 's' or 'n'.");
12586 OutputBecomesInput, Fn);
12593 OutputBecomesInput, Fn);
12595 OutputBecomesInput, Fn);
12599 OutputBecomesInput, Fn);
12603 OutputBecomesInput, Fn);
12612 OutputBecomesInput, Fn);
12619 MangledName, OutputBecomesInput, Fn);
12621 MangledName, OutputBecomesInput, Fn);
12625 MangledName, OutputBecomesInput, Fn);
12629 MangledName, OutputBecomesInput, Fn);
12639 return OffloadEntriesTargetRegion.empty() &&
12640 OffloadEntriesDeviceGlobalVar.empty();
12643unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
12645 auto It = OffloadEntriesTargetRegionCount.find(
12646 getTargetRegionEntryCountKey(EntryInfo));
12647 if (It == OffloadEntriesTargetRegionCount.end())
12652void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
12654 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
12655 EntryInfo.
Count + 1;
12661 OffloadEntriesTargetRegion[EntryInfo] =
12664 ++OffloadingEntriesNum;
12670 assert(EntryInfo.
Count == 0 &&
"expected default EntryInfo");
12673 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
12677 if (OMPBuilder->Config.isTargetDevice()) {
12682 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
12683 Entry.setAddress(Addr);
12685 Entry.setFlags(Flags);
12691 "Target region entry already registered!");
12693 OffloadEntriesTargetRegion[EntryInfo] = Entry;
12694 ++OffloadingEntriesNum;
12696 incrementTargetRegionEntryInfoCount(EntryInfo);
12703 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
12705 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
12706 if (It == OffloadEntriesTargetRegion.end()) {
12710 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
12718 for (
const auto &It : OffloadEntriesTargetRegion) {
12719 Action(It.first, It.second);
12725 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
12726 ++OffloadingEntriesNum;
12732 if (OMPBuilder->Config.isTargetDevice()) {
12736 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
12738 if (Entry.getVarSize() == 0) {
12739 Entry.setVarSize(VarSize);
12740 Entry.setLinkage(Linkage);
12744 Entry.setVarSize(VarSize);
12745 Entry.setLinkage(Linkage);
12746 Entry.setAddress(Addr);
12749 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
12750 assert(Entry.isValid() && Entry.getFlags() == Flags &&
12751 "Entry not initialized!");
12752 if (Entry.getVarSize() == 0) {
12753 Entry.setVarSize(VarSize);
12754 Entry.setLinkage(Linkage);
12761 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
12762 Addr, VarSize, Flags, Linkage,
12765 OffloadEntriesDeviceGlobalVar.try_emplace(
12766 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage,
"");
12767 ++OffloadingEntriesNum;
12774 for (
const auto &E : OffloadEntriesDeviceGlobalVar)
12775 Action(E.getKey(), E.getValue());
12782void CanonicalLoopInfo::collectControlBlocks(
12789 BBs.
append({getPreheader(), Header,
Cond, Latch, Exit, getAfter()});
12801void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
12813void CanonicalLoopInfo::mapIndVar(
12823 for (
Use &U : OldIV->
uses()) {
12827 if (
User->getParent() == getCond())
12829 if (
User->getParent() == getLatch())
12835 Value *NewIV = Updater(OldIV);
12838 for (Use *U : ReplacableUses)
12859 "Preheader must terminate with unconditional branch");
12861 "Preheader must jump to header");
12865 "Header must terminate with unconditional branch");
12866 assert(Header->getSingleSuccessor() == Cond &&
12867 "Header must jump to exiting block");
12870 assert(Cond->getSinglePredecessor() == Header &&
12871 "Exiting block only reachable from header");
12874 "Exiting block must terminate with conditional branch");
12876 "Exiting block's first successor jump to the body");
12878 "Exiting block's second successor must exit the loop");
12882 "Body only reachable from exiting block");
12887 "Latch must terminate with unconditional branch");
12888 assert(Latch->getSingleSuccessor() == Header &&
"Latch must jump to header");
12891 assert(Latch->getSinglePredecessor() !=
nullptr);
12896 "Exit block must terminate with unconditional branch");
12897 assert(Exit->getSingleSuccessor() == After &&
12898 "Exit block must jump to after block");
12902 "After block only reachable from exit block");
12906 assert(IndVar &&
"Canonical induction variable not found?");
12908 "Induction variable must be an integer");
12910 "Induction variable must be a PHI in the loop header");
12916 auto *NextIndVar =
cast<PHINode>(IndVar)->getIncomingValue(1);
12924 assert(TripCount &&
"Loop trip count not found?");
12926 "Trip count and induction variable must have the same type");
12930 "Exit condition must be a signed less-than comparison");
12932 "Exit condition must compare the induction variable");
12934 "Exit condition must compare with the trip count");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
uint64_t IntrinsicInst * II
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static Value * removeASCastIfPresent(Value *V)
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Value *TripCount, Function &LoopBodyFn, bool NoLoop)
Value * createFakeIntVal(IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, llvm::SmallVectorImpl< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true, bool Is64Bit=false)
static Function * createTargetParallelWrapper(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn)
Create wrapper function used to gather the outlined function's argument structure from a shared buffe...
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
static FunctionCallee getKmpcDistForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void applyParallelAccessesMetadata(CanonicalLoopInfo *CLI, LLVMContext &Ctx, Loop *Loop, LoopInfo &LoopInfo, SmallVector< Metadata * > &LoopMDList)
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static Expected< Function * > createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void FixupDebugInfoForOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, Function *Func, DenseMap< Value *, std::tuple< Value *, unsigned > > &ValueReplacementMap)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static std::string mangleVectorParameters(ArrayRef< llvm::OpenMPIRBuilder::DeclareSimdAttrTy > ParamAttrs)
Mangle the parameter part of the vector function name according to their OpenMP classification.
static bool isGenericKernel(Function &Fn)
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType, bool NoLoop)
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static llvm::CallInst * emitNoUnwindRuntimeCall(IRBuilder<> &Builder, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args, const llvm::Twine &Name)
static Error populateReductionFunction(Function *ReductionFunc, ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, IRBuilder<> &Builder, ArrayRef< bool > IsByRef, bool IsGPU)
static Function * getFreshReductionFunc(Module &M)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static void checkReductionInfos(ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, bool IsGPU)
static Type * getOffloadingArrayType(Value *V)
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasDistScheduleChunks)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause, bool HasDistScheduleChunks)
Determine the schedule type using schedule and ordering clause arguments.
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static std::optional< omp::OMPTgtExecModeFlags > getTargetKernelExecMode(Function &Kernel)
Given a function, if it represents the entry point of a target kernel, this returns the execution mod...
static StructType * createTaskWithPrivatesTy(OpenMPIRBuilder &OMPIRBuilder, ArrayRef< Value * > OffloadingArraysToPrivatize)
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static cl::opt< bool > UseDefaultMaxThreads("openmp-ir-builder-use-default-max-threads", cl::Hidden, cl::desc("Use a default max threads if none is provided."), cl::init(true))
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, ArrayRef< BasicBlock * > DeallocBlocks, OpenMPIRBuilder::TargetDataInfo &Info, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, Function *OutlinedFn, Constant *OutlinedFnID, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB, const OpenMPIRBuilder::DependenciesInfo &Dependencies, bool HasNoWait, Value *DynCGroupMem, OMPDynGroupprivateFallbackType DynCGroupMemFallback)
static Value * emitTaskDependencies(OpenMPIRBuilder &OMPBuilder, const SmallVectorImpl< OpenMPIRBuilder::DependData > &Dependencies)
static Error emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void updateNVPTXAttr(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OpenMPIRBuilder::InsertPointTy getInsertPointAfterInstr(Instruction *I)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static void hoistNonEntryAllocasToEntryBlock(llvm::BasicBlock &Block)
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addAccessGroupMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static void restoreIPandDebugLoc(llvm::IRBuilderBase &Builder, llvm::IRBuilderBase::InsertPoint IP)
This is wrapper over IRBuilderBase::restoreIP that also restores the current debug location to the la...
static LoadInst * loadSharedDataFromTaskDescriptor(OpenMPIRBuilder &OMPIRBuilder, IRBuilderBase &Builder, Value *TaskWithPrivates, Type *TaskWithPrivatesTy)
Given a task descriptor, TaskWithPrivates, return the pointer to the block of pointers containing sha...
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static bool hasGridValue(const Triple &T)
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static Function * emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI, StructType *PrivatesTy, StructType *TaskWithPrivatesTy, const size_t NumOffloadingArrays, const int SharedArgsOperandNo)
Create an entry point for a target task with the following.
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
static AtomicOrdering TransformReleaseAcquireRelease(AtomicOrdering AO)
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
SmallPtrSet< BasicBlock *, 0 > BlockSet
This file implements the SmallBitVector class.
This file defines the SmallSet class.
static SymbolRef::Type getType(const Symbol *Sym)
Defines the virtual file system interface vfs::FileSystem.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
An arbitrary precision integer that knows its signedness.
static APSInt getUnsigned(uint64_t X)
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
LLVM_ABI bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
This class represents an incoming formal argument to a Function.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
bool empty() const
Check if the array is empty.
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
LLVM_ABI AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
void setWeak(bool IsWeak)
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
LLVM_ABI std::pair< LoadInst *, AllocaInst * > EmitAtomicLoadLibcall(AtomicOrdering AO)
LLVM_ABI void EmitAtomicStoreLibcall(AtomicOrdering AO, Value *Source)
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMaximumNum
*p = maximumnum(old, v) maximumnum matches the behavior of llvm.maximumnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
@ FMinimumNum
*p = minimumnum(old, v) minimumnum matches the behavior of llvm.minimumnum.
This class holds the attributes for a particular argument, parameter, function, or return value.
LLVM_ABI AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
LLVM_ABI AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
LLVM Basic Block Representation.
LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
reverse_iterator rbegin()
bool hasTerminator() const LLVM_READONLY
Returns whether the block has a terminator.
const Instruction & back() const
LLVM_ABI BasicBlock * splitBasicBlockBefore(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction and insert the new basic blo...
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
InstListType::reverse_iterator reverse_iterator
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const Instruction * getTerminatorOrNull() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Class to represented the control flow structure of an OpenMP canonical loop.
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
BasicBlock * getHeader() const
The header is the entry for each iteration.
LLVM_ABI void assertOK() const
Consistency self-check.
Type * getIndVarType() const
Return the type of the induction variable (and the trip count).
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
bool isValid() const
Returns whether this object currently represents the IR of a loop.
void setLastIter(Value *IterVar)
Sets the last iteration variable for this loop.
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Function * getFunction() const
LLVM_ABI void invalidate()
Invalidate this loop.
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
LLVM_ABI BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true, bool ByteString=false)
This method constructs a CDS and initializes it with a text string.
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getTruncOrBitCast(Constant *C, Type *Ty)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getSizeOf(Type *Ty)
getSizeOf constant expr - computes the (alloc) size of a type (in address-units, not bits) in a targe...
static LLVM_ABI Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI ConstantFP * getZero(Type *Ty, bool Negative=false)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DILocalScope * getScope() const
Get the local scope for this variable.
DINodeArray getAnnotations() const
Subprogram description. Uses SubclassData1.
uint32_t getAlignInBits() const
StringRef getName() const
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Analysis pass which computes a DominatorTree.
LLVM_ABI DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
reference get()
Returns a reference to the stored T value.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
bool hasMetadata() const
Return true if this GlobalObject has any metadata attached to it.
LLVM_ABI void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AppendingLinkage
Special purpose, only applies to global arrays.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
bool isSet() const
Returns true if this insert point is set.
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
InsertPoint saveIP() const
Returns the current insert point.
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void moveBeforePreserving(InstListType::iterator MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Analysis pass that exposes the LoopInfo for a function.
LLVM_ABI LoopInfo run(Function &F, FunctionAnalysisManager &AM)
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
Device global variable entries info.
Target region entries info.
Base class of the entries info.
Class that manages information about offload code regions and data.
function_ref< void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)> OffloadDeviceGlobalVarEntryInfoActTy
Applies action Action on all registered entries.
OMPTargetDeviceClauseKind
Kind of device clause for declare target variables and functions NOTE: Currently not used as a part o...
@ OMPTargetDeviceClauseAny
The target is marked for all devices.
LLVM_ABI void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage)
Register device global variable entry.
LLVM_ABI void initializeDeviceGlobalVarEntryInfo(StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order)
Initialize device global variable entry.
LLVM_ABI void actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy &Action)
OMPTargetRegionEntryKind
Kind of the target registry entry.
@ OMPTargetRegionEntryTargetRegion
Mark the entry as target region.
LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, const TargetRegionEntryInfo &EntryInfo)
LLVM_ABI bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId=false) const
Return true if a target region entry with the provided information exists.
LLVM_ABI void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Register target region entry.
LLVM_ABI void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
LLVM_ABI void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, unsigned Order)
Initialize target region entry.
OMPTargetGlobalVarEntryKind
Kind of the global variable entry..
@ OMPTargetGlobalVarEntryEnter
Mark the entry as a declare target enter.
@ OMPTargetGlobalRegisterRequires
Mark the entry as a register requires global.
@ OMPTargetGlobalVarEntryIndirect
Mark the entry as a declare target indirect global.
@ OMPTargetGlobalVarEntryLink
Mark the entry as a to declare target link.
@ OMPTargetGlobalVarEntryTo
Mark the entry as a to declare target.
@ OMPTargetGlobalVarEntryIndirectVTable
Mark the entry as a declare target indirect vtable.
function_ref< void(const TargetRegionEntryInfo &EntryInfo, const OffloadEntryInfoTargetRegion &)> OffloadTargetRegionEntryInfoActTy
brief Applies action Action on all registered entries.
bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const
Checks if the variable with the given name has been registered already.
LLVM_ABI bool empty() const
Return true if a there are no entries defined.
std::optional< bool > IsTargetDevice
Flag to define whether to generate code for the role of the OpenMP host (if set to false) or device (...
std::optional< bool > IsGPU
Flag for specifying if the compilation is done for an accelerator.
LLVM_ABI int64_t getRequiresFlags() const
Returns requires directive clauses as flags compatible with those expected by libomptarget.
std::optional< bool > OpenMPOffloadMandatory
Flag for specifying if offloading is mandatory.
LLVM_ABI void setHasRequiresReverseOffload(bool Value)
LLVM_ABI OpenMPIRBuilderConfig()
LLVM_ABI bool hasRequiresUnifiedSharedMemory() const
LLVM_ABI void setHasRequiresUnifiedSharedMemory(bool Value)
unsigned getDefaultTargetAS() const
LLVM_ABI bool hasRequiresDynamicAllocators() const
LLVM_ABI void setHasRequiresUnifiedAddress(bool Value)
bool isTargetDevice() const
LLVM_ABI void setHasRequiresDynamicAllocators(bool Value)
LLVM_ABI bool hasRequiresReverseOffload() const
bool hasRequiresFlags() const
LLVM_ABI bool hasRequiresUnifiedAddress() const
Struct that keeps the information that should be kept throughout a 'target data' region.
An interface to create LLVM-IR for OpenMP directives.
LLVM_ABI InsertPointOrErrorTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for 'omp ordered [threads | simd]'.
LLVM_ABI void emitAArch64DeclareSimdFunction(llvm::Function *Fn, unsigned VLENVal, llvm::ArrayRef< DeclareSimdAttrTy > ParamAttrs, DeclareSimdBranch Branch, char ISA, unsigned NarrowestDataSize, bool OutputBecomesInput)
Emit AArch64 vector-function ABI attributes for a declare simd function.
LLVM_ABI Constant * getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
LLVM_ABI FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
LLVM_ABI InsertPointOrErrorTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for 'omp cancel'.
std::function< Expected< Function * >(StringRef FunctionName)> FunctionGenCallback
Functions used to generate a function with the given name.
LLVM_ABI CallInst * createOMPAllocShared(const LocationDescription &Loc, Value *Size, const Twine &Name=Twine(""))
Create a runtime call for kmpc_alloc_shared.
ReductionGenCBKind
Enum class for the RedctionGen CallBack type to be used.
LLVM_ABI CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
LLVM_ABI void createTaskyield(const LocationDescription &Loc)
Generator for 'omp taskyield'.
std::function< Error(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
LLVM_ABI void emitBranch(BasicBlock *Target)
LLVM_ABI Error emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective)
Generate control flow and cleanup for cancellation.
static LLVM_ABI void writeThreadBoundsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
LLVM_ABI void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
LLVM_ABI Constant * registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, Function *OutlinedFunction, StringRef EntryFnName, StringRef EntryFnIDName)
Registers the given function and sets up the attribtues of the function Returns the FunctionID.
LLVM_ABI GlobalVariable * emitKernelExecutionMode(StringRef KernelName, omp::OMPTgtExecModeFlags Mode)
Emit the kernel execution mode.
LLVM_ABI void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
LLVM_ABI InsertPointTy createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly, bool IsWeak=false)
LLVM_ABI InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic write for : X = Expr — Only Scalar data types.
LLVM_ABI void loadOffloadInfoMetadata(Module &M)
Loads all the offload entries information from the host IR metadata.
function_ref< MapInfosTy &(InsertPointTy CodeGenIP)> GenMapInfoCallbackTy
Callback type for creating the map infos for the kernel parameters.
LLVM_ABI Error emitOffloadingArrays(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
LLVM_ABI void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
function_ref< Error(InsertPointTy CodeGenIP, Value *IndVar)> LoopBodyGenCallbackTy
Callback type for loop body code generation.
LLVM_ABI InsertPointOrErrorTy emitScanReduction(const LocationDescription &Loc, ArrayRef< llvm::OpenMPIRBuilder::ReductionInfo > ReductionInfos, ScanInfo *ScanRedInfo)
This function performs the scan reduction of the values updated in the input phase.
LLVM_ABI void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
LLVM_ABI InsertPointOrErrorTy createScope(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait)
Generator for 'omp scope'.
static LLVM_ABI std::pair< int32_t, int32_t > readThreadBoundsForKernel(const Triple &T, Function &Kernel)
}
OpenMPIRBuilderConfig Config
The OpenMPIRBuilder Configuration.
LLVM_ABI CallInst * createOMPInteropDestroy(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_destroy.
LLVM_ABI void emitUsed(StringRef Name, ArrayRef< llvm::WeakTrackingVH > List)
Emit the llvm.used metadata.
LLVM_ABI InsertPointOrErrorTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait, ArrayRef< llvm::Value * > CPVars={}, ArrayRef< llvm::Function * > CPFuncs={})
Generator for 'omp single'.
LLVM_ABI InsertPointOrErrorTy createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower=nullptr, Value *NumTeamsUpper=nullptr, Value *ThreadLimit=nullptr, Value *IfExpr=nullptr)
Generator for #omp teams
std::forward_list< CanonicalLoopInfo > LoopInfos
Collection of owned canonical loop objects that eventually need to be free'd.
LLVM_ABI void createTaskwait(const LocationDescription &Loc)
Generator for 'omp taskwait'.
LLVM_ABI llvm::StructType * getKmpTaskAffinityInfoTy()
Return the LLVM struct type matching runtime kmp_task_affinity_info_t.
LLVM_ABI CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
LLVM_ABI std::string createPlatformSpecificName(ArrayRef< StringRef > Parts) const
Get the create a name using the platform specific separators.
LLVM_ABI FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned.
static LLVM_ABI void getKernelArgsVector(TargetKernelArgs &KernelArgs, IRBuilderBase &Builder, SmallVector< Value * > &ArgsVector)
Create the kernel args vector used by emitTargetKernel.
LLVM_ABI InsertPointOrErrorTy createTarget(const LocationDescription &Loc, bool IsOffloadEntry, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::InsertPointTy CodeGenIP, ArrayRef< BasicBlock * > DeallocBlocks, TargetDataInfo &Info, TargetRegionEntryInfo &EntryInfo, const TargetKernelDefaultAttrs &DefaultAttrs, const TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, SmallVectorImpl< Value * > &Inputs, GenMapInfoCallbackTy GenMapInfoCB, TargetBodyGenCallbackTy BodyGenCB, TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB, CustomMapperCallbackTy CustomMapperCB, const DependenciesInfo &Dependencies={}, bool HasNowait=false, Value *DynCGroupMem=nullptr, omp::OMPDynGroupprivateFallbackType DynCGroupMemFallback=omp::OMPDynGroupprivateFallbackType::Abort)
Generator for 'omp target'.
LLVM_ABI void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
LLVM_ABI omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position)
Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on the position given.
LLVM_ABI void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Module & M
The underlying LLVM-IR module.
StringMap< Constant * > SrcLocStrMap
Map to remember source location strings.
LLVM_ABI void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
LLVM_ABI Constant * getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the source location LocStr.
LLVM_ABI Error emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry, Function *&OutlinedFn, Constant *&OutlinedFnID)
Create a unique name for the entry function using the source location information of the current targ...
LLVM_ABI InsertPointOrErrorTy createIteratorLoop(LocationDescription Loc, llvm::Value *TripCount, IteratorBodyGenTy BodyGen, llvm::StringRef Name="iterator")
Create a canonical iterator loop at the current insertion point.
LLVM_ABI Expected< SmallVector< llvm::CanonicalLoopInfo * > > createCanonicalScanLoops(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, InsertPointTy ComputeIP, const Twine &Name, ScanInfo *ScanRedInfo)
Generator for the control flow structure of an OpenMP canonical loops if the parent directive has an ...
LLVM_ABI FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned.
function_ref< InsertPointOrErrorTy( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< BasicBlock * > DeallocBlocks)> TargetBodyGenCallbackTy
LLVM_ABI void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
function_ref< Error(Value *DeviceID, Value *RTLoc, IRBuilderBase::InsertPoint TargetTaskAllocaIP)> TargetTaskBodyCallbackTy
Callback type for generating the bodies of device directives that require outer target tasks (e....
Expected< MapInfosTy & > MapInfosOrErrorTy
bool HandleFPNegZero
Emit atomic compare for constructs: — Only scalar data types cond-expr-stmt: x = x ordop expr ?
LLVM_ABI void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
LLVM_ABI void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
LLVM_ABI InsertPointOrErrorTy createDistribute(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< BasicBlock * > DeallocBlocks, BodyGenCallbackTy BodyGenCB)
Generator for #omp distribute
LLVM_ABI Expected< Function * > emitUserDefinedMapper(function_ref< MapInfosOrErrorTy(InsertPointTy CodeGenIP, llvm::Value *PtrPHI, llvm::Value *BeginArg)> PrivAndGenMapInfoCB, llvm::Type *ElemTy, StringRef FuncName, CustomMapperCallbackTy CustomMapperCB, bool PreserveMemberOfFlags=false)
Emit the user-defined mapper function.
LLVM_ABI InsertPointOrErrorTy createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< BasicBlock * > DeallocBlocks, BodyGenCallbackTy BodyGenCB, bool Tied=true, Value *Final=nullptr, Value *IfCondition=nullptr, const DependenciesInfo &Dependencies={}, const AffinityData &Affinities={}, bool Mergeable=false, Value *EventHandle=nullptr, Value *Priority=nullptr)
Generator for #omp taskloop
function_ref< Expected< Function * >(unsigned int)> CustomMapperCallbackTy
LLVM_ABI InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for 'omp ordered depend (source | sink)'.
LLVM_ABI InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
function_ref< InsertPointOrErrorTy( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original, Value &Inner, Value *&ReplVal)> PrivatizeCallbackTy
Callback type for variable privatization (think copy & default constructor).
LLVM_ABI bool isFinalized()
Check whether the finalize function has already run.
SmallVector< FinalizationInfo, 8 > FinalizationStack
The finalization stack made up of finalize callbacks currently in-flight, wrapped into FinalizationIn...
LLVM_ABI std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
LLVM_ABI CallInst * createOMPInteropInit(const LocationDescription &Loc, Value *InteropVar, omp::OMPInteropType InteropType, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_init.
LLVM_ABI Error emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP={}, ArrayRef< BasicBlock * > DeallocBlocks={})
Emits code for OpenMP 'if' clause using specified BodyGenCallbackTy Here is the logic: if (Cond) { Th...
LLVM_ABI void finalize(Function *Fn=nullptr)
Finalize the underlying module, e.g., by outlining regions.
LLVM_ABI Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
void addOutlineInfo(std::unique_ptr< OutlineInfo > &&OI)
Add a new region that will be outlined later.
LLVM_ABI InsertPointTy createTargetInit(const LocationDescription &Loc, const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
The omp target interface.
LLVM_ABI InsertPointOrErrorTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false, bool IsTeamsReduction=false)
Generator for 'omp reduction'.
const Triple T
The target triple of the underlying module.
DenseMap< std::pair< Constant *, uint64_t >, Constant * > IdentMap
Map to remember existing ident_t*.
LLVM_ABI CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
LLVM_ABI InsertPointOrErrorTy createReductionsGPU(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false, bool IsTeamsReduction=false, bool IsSPMD=false, ReductionGenCBKind ReductionGenCBKind=ReductionGenCBKind::MLIR, std::optional< omp::GV > GridValue={}, Value *SrcLocInfo=nullptr)
Design of OpenMP reductions on the GPU.
LLVM_ABI FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned, bool IsGPUDistribute)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_alloc.
LLVM_ABI void emitNonContiguousDescriptor(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info)
Emit an array of struct descriptors to be assigned to the offload args.
LLVM_ABI InsertPointOrErrorTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for 'omp section'.
LLVM_ABI InsertPointOrErrorTy createTaskgroup(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< BasicBlock * > DeallocBlocks, BodyGenCallbackTy BodyGenCB)
Generator for the taskgroup construct.
LLVM_ABI InsertPointOrErrorTy createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< BasicBlock * > DeallocBlocks, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for 'omp parallel'.
function_ref< InsertPointOrErrorTy(InsertPointTy)> EmitFallbackCallbackTy
Callback function type for functions emitting the host fallback code that is executed when the kernel...
static LLVM_ABI TargetRegionEntryInfo getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, vfs::FileSystem &VFS, StringRef ParentName="")
Creates a unique info for a target entry when provided a filename and line number from.
LLVM_ABI void emitTaskDependency(IRBuilderBase &Builder, Value *Entry, const DependData &Dep)
Store one kmp_depend_info entry at the given Entry pointer.
LLVM_ABI void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished=false)
LLVM_ABI Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
LLVM_ABI InsertPointOrErrorTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for 'omp master'.
LLVM_ABI InsertPointOrErrorTy createTargetData(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< BasicBlock * > DeallocBlocks, Value *DeviceID, Value *IfCond, TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, CustomMapperCallbackTy CustomMapperCB, omp::RuntimeFunction *MapperFunc=nullptr, function_ref< InsertPointOrErrorTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)> BodyGenCB=nullptr, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, Value *SrcLocInfo=nullptr)
Generator for 'omp target data'.
LLVM_ABI CallInst * createRuntimeFunctionCall(FunctionCallee Callee, ArrayRef< Value * > Args, StringRef Name="")
LLVM_ABI InsertPointOrErrorTy emitKernelLaunch(const LocationDescription &Loc, Value *OutlinedFnID, EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP)
Generate a target region entry call and host fallback call.
StringMap< GlobalVariable *, BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
LLVM_ABI InsertPointOrErrorTy createCancellationPoint(const LocationDescription &Loc, omp::Directive CanceledDirective)
Generator for 'omp cancellation point'.
LLVM_ABI CallInst * createOMPAlignedAlloc(const LocationDescription &Loc, Value *Align, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_align_alloc.
LLVM_ABI FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI InsertPointOrErrorTy createScan(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< llvm::Value * > ScanVars, ArrayRef< llvm::Type * > ScanVarsType, bool IsInclusive, ScanInfo *ScanRedInfo)
This directive split and directs the control flow to input phase blocks or scan phase blocks based on...
LLVM_ABI CallInst * createOMPFreeShared(const LocationDescription &Loc, Value *Addr, Value *Size, const Twine &Name=Twine(""))
Create a runtime call for kmpc_free_shared.
LLVM_ABI CallInst * createOMPInteropUse(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_use.
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
LLVM_ABI GlobalVariable * getOrCreateInternalVariable(Type *Ty, const StringRef &Name, std::optional< unsigned > AddressSpace={})
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
LLVM_ABI GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
std::forward_list< ScanInfo > ScanInfos
Collection of owned ScanInfo objects that eventually need to be free'd.
static LLVM_ABI void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
LLVM_ABI Value * calculateCanonicalLoopTripCount(const LocationDescription &Loc, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, const Twine &Name="loop")
Calculate the trip count of a canonical loop.
LLVM_ABI InsertPointOrErrorTy createBarrier(const LocationDescription &Loc, omp::Directive Kind, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
LLVM_ABI void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags, omp::OpenMPOffloadMappingFlags MemberOfFlag)
Given an initial flag set, this function modifies it to contain the passed in MemberOfFlag generated ...
LLVM_ABI Error emitOffloadingArraysAndArgs(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, bool ForEndCall=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Allocates memory for and populates the arrays required for offloading (offload_{baseptrs|ptrs|mappers...
LLVM_ABI Constant * getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the default source location.
LLVM_ABI InsertPointOrErrorTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for 'omp critical'.
LLVM_ABI void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, StringRef Name="")
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags.
static LLVM_ABI unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, const StringMap< bool > &Features)
Get the default alignment value for given target.
LLVM_ABI unsigned getFlagMemberOffset()
Get the offset of the OMP_MAP_MEMBER_OF field.
LLVM_ABI InsertPointOrErrorTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind=llvm::omp::OMP_SCHEDULE_Default, Value *ChunkSize=nullptr, bool HasSimdModifier=false, bool HasMonotonicModifier=false, bool HasNonmonotonicModifier=false, bool HasOrderedClause=false, omp::WorksharingLoopType LoopType=omp::WorksharingLoopType::ForStaticLoop, bool NoLoop=false, bool HasDistSchedule=false, Value *DistScheduleChunkSize=nullptr)
Modifies the canonical loop to be a workshare loop.
LLVM_ABI InsertPointOrErrorTy createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
LLVM_ABI void createOffloadEntriesAndInfoMetadata(EmitMetadataErrorReportFunctionTy &ErrorReportFunction)
LLVM_ABI void applySimd(CanonicalLoopInfo *Loop, MapVector< Value *, Value * > AlignedVars, Value *IfCond, omp::OrderKind Order, ConstantInt *Simdlen, ConstantInt *Safelen)
Add metadata to simd-ize a loop.
SmallVector< std::unique_ptr< OutlineInfo >, 16 > OutlineInfos
Collection of regions that need to be outlined during finalization.
LLVM_ABI InsertPointOrErrorTy createAtomicUpdate(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
std::function< std::tuple< std::string, uint64_t >()> FileIdentifierInfoCallbackTy
bool isLastFinalizationInfoCancellable(omp::Directive DK)
Return true if the last entry in the finalization stack is of kind DK and cancellable.
LLVM_ABI InsertPointTy emitTargetKernel(const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, ArrayRef< Value * > KernelArgs)
Generate a target region entry call.
LLVM_ABI GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
LLVM_ABI ~OpenMPIRBuilder()
LLVM_ABI CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
IRBuilder Builder
The LLVM-IR Builder used to create IR.
LLVM_ABI GlobalValue * createGlobalFlag(unsigned Value, StringRef Name)
Create a hidden global flag Name in the module with initial value Value.
LLVM_ABI void emitOffloadingArraysArgument(IRBuilderBase &Builder, OpenMPIRBuilder::TargetDataRTArgs &RTArgs, OpenMPIRBuilder::TargetDataInfo &Info, bool ForEndCall=false)
Emit the arguments to be passed to the runtime library based on the arrays of base pointers,...
LLVM_ABI InsertPointOrErrorTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for 'omp masked'.
LLVM_ABI Expected< CanonicalLoopInfo * > createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
function_ref< Expected< InsertPointTy >( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DestPtr, Value *SrcPtr)> TaskDupCallbackTy
Callback type for task duplication function code generation.
LLVM_ABI Value * getSizeInBytes(Value *BasePtr)
Computes the size of type in bytes.
llvm::function_ref< llvm::Error( InsertPointTy BodyIP, llvm::Value *LinearIV)> IteratorBodyGenTy
LLVM_ABI FunctionCallee createDispatchDeinitFunction()
Returns __kmpc_dispatch_deinit runtime function.
LLVM_ABI void registerTargetGlobalVariable(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage, Type *LlvmPtrTy, Constant *Addr)
Registers a target variable for device or host.
LLVM_ABI void createTargetDeinit(const LocationDescription &Loc, int32_t TeamsReductionDataSize=0)
Create a runtime call for kmpc_target_deinit.
BodyGenTy
Type of BodyGen to use for region codegen.
LLVM_ABI CanonicalLoopInfo * fuseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops)
Fuse a sequence of loops.
LLVM_ABI void emitX86DeclareSimdFunction(llvm::Function *Fn, unsigned NumElements, const llvm::APSInt &VLENVal, llvm::ArrayRef< DeclareSimdAttrTy > ParamAttrs, DeclareSimdBranch Branch)
Emit x86 vector-function ABI attributes for a declare simd function.
SmallVector< llvm::Function *, 16 > ConstantAllocaRaiseCandidates
A collection of candidate target functions that's constant allocas will attempt to be raised on a cal...
OffloadEntriesInfoManager OffloadInfoManager
Info manager to keep track of target regions.
static LLVM_ABI std::pair< int32_t, int32_t > readTeamBoundsForKernel(const Triple &T, Function &Kernel)
Read/write a bounds on teams for Kernel.
const std::string ompOffloadInfoName
OMP Offload Info Metadata name string.
Expected< InsertPointTy > InsertPointOrErrorTy
Type used to represent an insertion point or an error value.
LLVM_ABI InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
LLVM_ABI InsertPointOrErrorTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for 'omp sections'.
std::function< void(EmitMetadataErrorKind, TargetRegionEntryInfo)> EmitMetadataErrorReportFunctionTy
Callback function type.
function_ref< InsertPointOrErrorTy( Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< InsertPointTy > DeallocIPs)> TargetGenArgAccessorsCallbackTy
LLVM_ABI Expected< ScanInfo * > scanInfoInitialize()
Creates a ScanInfo object, allocates and returns the pointer.
LLVM_ABI InsertPointOrErrorTy emitTargetTask(TargetTaskBodyCallbackTy TaskBodyCB, Value *DeviceID, Value *RTLoc, OpenMPIRBuilder::InsertPointTy AllocaIP, const DependenciesInfo &Dependencies, const TargetDataRTArgs &RTArgs, bool HasNoWait)
Generate a target-task for the target construct.
LLVM_ABI InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic Read for : V = X — Only Scalar data types.
function_ref< Error(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< BasicBlock * > DeallocBlocks)> BodyGenCallbackTy
Callback type for body (=inner region) code generation.
bool updateToLocation(const LocationDescription &Loc)
Update the internal location to Loc.
LLVM_ABI void createFlush(const LocationDescription &Loc)
Generator for 'omp flush'.
LLVM_ABI Constant * getAddrOfDeclareTargetVar(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, Type *LlvmPtrTy, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage)
Retrieve (or create if non-existent) the address of a declare target variable, used in conjunction wi...
EmitMetadataErrorKind
The kind of errors that can occur when emitting the offload entries and metadata.
@ EMIT_MD_DECLARE_TARGET_ERROR
@ EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR
@ EMIT_MD_GLOBAL_VAR_LINK_ERROR
@ EMIT_MD_TARGET_REGION_ERROR
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Analysis pass that exposes the ScalarEvolution for a function.
LLVM_ABI ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
ScanInfo holds the information to assist in lowering of Scan reduction.
llvm::SmallDenseMap< llvm::Value *, llvm::Value * > * ScanBuffPtrs
Maps the private reduction variable to the pointer of the temporary buffer.
llvm::BasicBlock * OMPScanLoopExit
Exit block of loop body.
llvm::Value * IV
Keeps track of value of iteration variable for input/scan loop to be used for Scan directive lowering...
llvm::BasicBlock * OMPAfterScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanInit
Block before loop body where scan initializations are done.
llvm::BasicBlock * OMPBeforeScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanFinish
Block after loop body where scan finalizations are done.
llvm::Value * Span
Stores the span of canonical loop being lowered to be used for temporary buffer allocation or Finaliz...
bool OMPFirstScanLoop
If true, it indicates Input phase is lowered; else it indicates ScanPhase is lowered.
llvm::BasicBlock * OMPScanDispatch
Controls the flow to before or after scan blocks.
A vector that has set insertion semantics.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
bool empty() const
Determine if the SetVector is empty or not.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
bool any() const
Returns true if any bit is set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Represent a constant reference to a string, i.e.
std::string str() const
Get the contents as an std::string.
constexpr bool empty() const
Check if the string is empty.
constexpr size_t size() const
Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Type * getElementType(unsigned N) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
LLVM_ABI Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(const Triple &TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI Type * getStructElementType(unsigned N) const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isVoidTy() const
Return true if this is 'void'.
Unconditional Branch instruction.
static UncondBrInst * Create(BasicBlock *Target, InsertPosition InsertBefore=nullptr)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
LLVM_ABI bool canUnroll(OptimizationRemarkEmitter *ORE=nullptr, const Loop *L=nullptr) const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI bool replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
The virtual file system interface.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false, bool IsText=true)
This is a convenience method that opens a file, gets its content and then closes the file.
virtual llvm::ErrorOr< Status > status(const Twine &Path)=0
Get the status of the entry at Path, if one exists.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
LLVM_ABI GlobalVariable * emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr)
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
constexpr const GV & getAMDGPUGridValues()
static constexpr GV SPIRVGridValues
For generic SPIR-V GPUs.
OMPDynGroupprivateFallbackType
The fallback types for the dyn_groupprivate clause.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
@ OMP_TGT_EXEC_MODE_SPMD_NO_LOOP
@ OMP_TGT_EXEC_MODE_GENERIC
Function * Kernel
Summary of a kernel (=entry point for target offloading).
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
LLVM_ABI BasicBlock * splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, llvm::Twine Suffix=".split")
Like splitBB, but reuses the current block's name for the new name.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
hash_code hash_value(const FixedPointSemantics &Val)
LLVM_ABI Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
@ LLVM_MARK_AS_BITMASK_ENUM
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned getPointerAddressSpace(const Type *T)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
testing::Matcher< const detail::ErrorHolder & > Failed()
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
LLVM_ABI BasicBlock * splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, DebugLoc DL, llvm::Twine Name={})
Split a BasicBlock at an InsertPoint, even if the block is degenerate (missing the terminator).
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
std::string utostr(uint64_t X, bool isNeg=false)
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool isa_and_nonnull(const Y &Val)
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
auto dyn_cast_or_null(const Y &Val)
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
LLVM_ABI TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Error make_error(ArgTs &&... Args)
Make a Error instance representing failure using the given error info type.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
@ Mul
Product of integers.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
DWARFExpression::Operation Op
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, bool CreateBranch, DebugLoc DL)
Move the instruction after an InsertPoint to the beginning of another BasicBlock.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto predecessors(const MachineBasicBlock *BB)
auto filter_to_vector(ContainerTy &&C, PredicateFn &&Pred)
Filter a range to a SmallVector with the element types deduced.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Next
LLVM_ABI Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
Attempt to constant fold an insertvalue instruction with the specified operands and indices.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
bool to_integer(StringRef S, N &Num, unsigned Base=0)
Convert the string S to an integer of the specified type using the radix Base. If Base is 0,...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI void computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP)
This struct is a compact representation of a valid (non-zero power of two) alignment.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
A struct to pack the relevant information for an OpenMP affinity clause.
a struct to pack relevant information while generating atomic Ops
A struct to pack the relevant information for an OpenMP depend clause.
omp::RTLDependenceKindTy DepKind
A struct to pack static and dynamic dependency information for a task.
SmallVector< DependData > Deps
LLVM_ABI Error mergeFiniBB(IRBuilderBase &Builder, BasicBlock *ExistingFiniBB)
For cases where there is an unavoidable existing finalization block (e.g.
LLVM_ABI Expected< BasicBlock * > getFiniBB(IRBuilderBase &Builder)
The basic block to which control should be transferred to implement the FiniCB.
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
MapNonContiguousArrayTy Offsets
MapNonContiguousArrayTy Counts
MapNonContiguousArrayTy Strides
This structure contains combined information generated for mappable clauses, including base pointers,...
MapDeviceInfoArrayTy DevicePointers
MapValuesArrayTy BasePointers
MapValuesArrayTy Pointers
StructNonContiguousInfo NonContigInfo
Helper that contains information about regions we need to outline during finalization.
void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
BasicBlock * OuterAllocBB
virtual std::unique_ptr< CodeExtractor > createCodeExtractor(ArrayRef< BasicBlock * > Blocks, bool ArgsInZeroAddressSpace, Twine Suffix=Twine(""))
Create a CodeExtractor instance based on the information stored in this structure,...
Information about an OpenMP reduction.
EvalKind EvaluationKind
Reduction evaluation kind - scalar, complex or aggregate.
ReductionGenAtomicCBTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
ReductionGenCBTy ReductionGen
Callback for generating the reduction body.
Value * Variable
Reduction variable of pointer type.
Value * PrivateVariable
Thread-private partial reduction variable.
ReductionGenClangCBTy ReductionGenClang
Clang callback for generating the reduction body.
Type * ElementType
Reduction element type, must match pointee type of variable.
ReductionGenDataPtrPtrCBTy DataPtrPtrGen
Container for the arguments used to pass data to the runtime library.
Value * SizesArray
The array of sizes passed to the runtime library.
Value * PointersArray
The array of section pointers passed to the runtime library.
Value * MappersArray
The array of user-defined mappers passed to the runtime library.
Value * MapTypesArrayEnd
The array of map types passed to the runtime library for the end of the region, or nullptr if there a...
Value * BasePointersArray
The array of base pointer passed to the runtime library.
Value * MapTypesArray
The array of map types passed to the runtime library for the beginning of the region or for the entir...
Value * MapNamesArray
The array of original declaration names of mapped pointers sent to the runtime library for debugging.
Data structure that contains the needed information to construct the kernel args vector.
ArrayRef< Value * > NumThreads
The number of threads.
TargetDataRTArgs RTArgs
Arguments passed to the runtime library.
Value * NumIterations
The number of iterations.
Value * DynCGroupMem
The size of the dynamic shared memory.
unsigned NumTargetItems
Number of arguments passed to the runtime library.
bool StrictBlocksAndThreads
True if the kernel strictly requires the number of blocks and threads above to run.
bool HasNoWait
True if the kernel has 'no wait' clause.
ArrayRef< Value * > NumTeams
The number of teams.
omp::OMPDynGroupprivateFallbackType DynCGroupMemFallback
The fallback mechanism for the shared memory.
Container to pass the default attributes with which a kernel must be launched, used to set kernel att...
omp::OMPTgtExecModeFlags ExecFlags
SmallVector< int32_t, 3 > MaxTeams
Container to pass LLVM IR runtime values or constants related to the number of teams and threads with...
Value * DeviceID
Device ID value used in the kernel launch.
SmallVector< Value *, 3 > MaxTeams
Value * MaxThreads
'parallel' construct 'num_threads' clause value, if present and it is an SPMD kernel.
Value * LoopTripCount
Total number of iterations of the SPMD or Generic-SPMD kernel or null if it is a generic kernel.
SmallVector< Value *, 3 > TargetThreadLimit
SmallVector< Value *, 3 > TeamsThreadLimit
Data structure to contain the information needed to uniquely identify a target entry.
static LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count)
static constexpr const char * KernelNamePrefix
The prefix used for kernel names.
static LLVM_ABI const Target * lookupTarget(const Triple &TheTriple, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...