65#define DEBUG_TYPE "openmp-ir-builder"
72 cl::desc(
"Use optimistic attributes describing "
73 "'as-if' properties of runtime calls."),
77 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
78 cl::desc(
"Factor for the unroll threshold to account for code "
79 "simplifications still taking place"),
90 if (!IP1.isSet() || !IP2.isSet())
92 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
97 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
98 case OMPScheduleType::UnorderedStaticChunked:
99 case OMPScheduleType::UnorderedStatic:
100 case OMPScheduleType::UnorderedDynamicChunked:
101 case OMPScheduleType::UnorderedGuidedChunked:
102 case OMPScheduleType::UnorderedRuntime:
103 case OMPScheduleType::UnorderedAuto:
104 case OMPScheduleType::UnorderedTrapezoidal:
105 case OMPScheduleType::UnorderedGreedy:
106 case OMPScheduleType::UnorderedBalanced:
107 case OMPScheduleType::UnorderedGuidedIterativeChunked:
108 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
109 case OMPScheduleType::UnorderedSteal:
110 case OMPScheduleType::UnorderedStaticBalancedChunked:
111 case OMPScheduleType::UnorderedGuidedSimd:
112 case OMPScheduleType::UnorderedRuntimeSimd:
113 case OMPScheduleType::OrderedStaticChunked:
114 case OMPScheduleType::OrderedStatic:
115 case OMPScheduleType::OrderedDynamicChunked:
116 case OMPScheduleType::OrderedGuidedChunked:
117 case OMPScheduleType::OrderedRuntime:
118 case OMPScheduleType::OrderedAuto:
119 case OMPScheduleType::OrderdTrapezoidal:
120 case OMPScheduleType::NomergeUnorderedStaticChunked:
121 case OMPScheduleType::NomergeUnorderedStatic:
122 case OMPScheduleType::NomergeUnorderedDynamicChunked:
123 case OMPScheduleType::NomergeUnorderedGuidedChunked:
124 case OMPScheduleType::NomergeUnorderedRuntime:
125 case OMPScheduleType::NomergeUnorderedAuto:
126 case OMPScheduleType::NomergeUnorderedTrapezoidal:
127 case OMPScheduleType::NomergeUnorderedGreedy:
128 case OMPScheduleType::NomergeUnorderedBalanced:
129 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
130 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
131 case OMPScheduleType::NomergeUnorderedSteal:
132 case OMPScheduleType::NomergeOrderedStaticChunked:
133 case OMPScheduleType::NomergeOrderedStatic:
134 case OMPScheduleType::NomergeOrderedDynamicChunked:
135 case OMPScheduleType::NomergeOrderedGuidedChunked:
136 case OMPScheduleType::NomergeOrderedRuntime:
137 case OMPScheduleType::NomergeOrderedAuto:
138 case OMPScheduleType::NomergeOrderedTrapezoidal:
146 SchedType & OMPScheduleType::MonotonicityMask;
147 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
159 Builder.restoreIP(IP);
169 Kernel->getFnAttribute(
"target-features").getValueAsString();
170 if (Features.
count(
"+wavefrontsize64"))
185 bool HasSimdModifier) {
187 switch (ClauseKind) {
188 case OMP_SCHEDULE_Default:
189 case OMP_SCHEDULE_Static:
190 return HasChunks ? OMPScheduleType::BaseStaticChunked
191 : OMPScheduleType::BaseStatic;
192 case OMP_SCHEDULE_Dynamic:
193 return OMPScheduleType::BaseDynamicChunked;
194 case OMP_SCHEDULE_Guided:
195 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
196 : OMPScheduleType::BaseGuidedChunked;
197 case OMP_SCHEDULE_Auto:
199 case OMP_SCHEDULE_Runtime:
200 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
201 : OMPScheduleType::BaseRuntime;
209 bool HasOrderedClause) {
210 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
211 OMPScheduleType::None &&
212 "Must not have ordering nor monotonicity flags already set");
215 ? OMPScheduleType::ModifierOrdered
216 : OMPScheduleType::ModifierUnordered;
217 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
220 if (OrderingScheduleType ==
221 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
222 return OMPScheduleType::OrderedGuidedChunked;
223 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
224 OMPScheduleType::ModifierOrdered))
225 return OMPScheduleType::OrderedRuntime;
227 return OrderingScheduleType;
233 bool HasSimdModifier,
bool HasMonotonic,
234 bool HasNonmonotonic,
bool HasOrderedClause) {
235 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
236 OMPScheduleType::None &&
237 "Must not have monotonicity flags already set");
238 assert((!HasMonotonic || !HasNonmonotonic) &&
239 "Monotonic and Nonmonotonic are contradicting each other");
242 return ScheduleType | OMPScheduleType::ModifierMonotonic;
243 }
else if (HasNonmonotonic) {
244 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
254 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
255 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
261 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
269 bool HasSimdModifier,
bool HasMonotonicModifier,
270 bool HasNonmonotonicModifier,
bool HasOrderedClause) {
276 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
277 HasNonmonotonicModifier, HasOrderedClause);
292 assert(!Br->isConditional() &&
293 "BB's terminator must be an unconditional branch (or degenerate)");
296 Br->setSuccessor(0,
Target);
301 NewBr->setDebugLoc(
DL);
307 "Target BB must not have PHI nodes");
327 NewBr->setDebugLoc(
DL);
335 spliceBB(Builder.saveIP(), New, CreateBranch,
DebugLoc);
339 Builder.SetInsertPoint(Old);
343 Builder.SetCurrentDebugLocation(
DebugLoc);
352 spliceBB(IP, New, CreateBranch,
DL);
353 New->replaceSuccessorsPhiUsesWith(Old, New);
362 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
364 Builder.SetInsertPoint(Builder.GetInsertBlock());
367 Builder.SetCurrentDebugLocation(
DebugLoc);
376 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
378 Builder.SetInsertPoint(Builder.GetInsertBlock());
381 Builder.SetCurrentDebugLocation(
DebugLoc);
388 return splitBB(Builder, CreateBranch, Old->
getName() + Suffix);
395 OpenMPIRBuilder::InsertPointTy OuterAllocaIP,
397 OpenMPIRBuilder::InsertPointTy InnerAllocaIP,
398 const Twine &Name =
"",
bool AsPtr =
true) {
399 Builder.restoreIP(OuterAllocaIP);
402 Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr, Name +
".addr");
406 FakeVal = FakeValAddr;
409 Builder.CreateLoad(Builder.getInt32Ty(), FakeValAddr, Name +
".val");
414 Builder.restoreIP(InnerAllocaIP);
418 Builder.CreateLoad(Builder.getInt32Ty(), FakeVal, Name +
".use");
434enum OpenMPOffloadingRequiresDirFlags {
436 OMP_REQ_UNDEFINED = 0x000,
438 OMP_REQ_NONE = 0x001,
440 OMP_REQ_REVERSE_OFFLOAD = 0x002,
442 OMP_REQ_UNIFIED_ADDRESS = 0x004,
444 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
446 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
452OpenMPIRBuilderConfig::OpenMPIRBuilderConfig()
453 : RequiresFlags(OMP_REQ_UNDEFINED) {}
455OpenMPIRBuilderConfig::OpenMPIRBuilderConfig(
456 bool IsTargetDevice,
bool IsGPU,
bool OpenMPOffloadMandatory,
457 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
458 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
459 : IsTargetDevice(IsTargetDevice), IsGPU(IsGPU),
460 OpenMPOffloadMandatory(OpenMPOffloadMandatory),
461 RequiresFlags(OMP_REQ_UNDEFINED) {
462 if (HasRequiresReverseOffload)
463 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
464 if (HasRequiresUnifiedAddress)
465 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
466 if (HasRequiresUnifiedSharedMemory)
467 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
468 if (HasRequiresDynamicAllocators)
469 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
472bool OpenMPIRBuilderConfig::hasRequiresReverseOffload()
const {
473 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
476bool OpenMPIRBuilderConfig::hasRequiresUnifiedAddress()
const {
477 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
480bool OpenMPIRBuilderConfig::hasRequiresUnifiedSharedMemory()
const {
481 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
484bool OpenMPIRBuilderConfig::hasRequiresDynamicAllocators()
const {
485 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
488int64_t OpenMPIRBuilderConfig::getRequiresFlags()
const {
489 return hasRequiresFlags() ? RequiresFlags
490 :
static_cast<int64_t
>(OMP_REQ_NONE);
493void OpenMPIRBuilderConfig::setHasRequiresReverseOffload(
bool Value) {
495 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
497 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
500void OpenMPIRBuilderConfig::setHasRequiresUnifiedAddress(
bool Value) {
502 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
504 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
507void OpenMPIRBuilderConfig::setHasRequiresUnifiedSharedMemory(
bool Value) {
509 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
511 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
514void OpenMPIRBuilderConfig::setHasRequiresDynamicAllocators(
bool Value) {
516 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
518 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
525void OpenMPIRBuilder::getKernelArgsVector(TargetKernelArgs &KernelArgs,
529 Value *PointerNum = Builder.getInt32(KernelArgs.NumTargetItems);
531 constexpr size_t MaxDim = 3;
533 Value *Flags = Builder.getInt64(KernelArgs.HasNoWait);
535 assert(!KernelArgs.NumTeams.empty() && !KernelArgs.NumThreads.empty());
538 Builder.CreateInsertValue(ZeroArray, KernelArgs.NumTeams[0], {0});
539 Value *NumThreads3D =
540 Builder.CreateInsertValue(ZeroArray, KernelArgs.NumThreads[0], {0});
542 seq<unsigned>(1, std::min(KernelArgs.NumTeams.size(), MaxDim)))
544 Builder.CreateInsertValue(NumTeams3D, KernelArgs.NumTeams[
I], {I});
546 seq<unsigned>(1, std::min(KernelArgs.NumThreads.size(), MaxDim)))
548 Builder.CreateInsertValue(NumThreads3D, KernelArgs.NumThreads[
I], {I});
550 ArgsVector = {Version,
552 KernelArgs.RTArgs.BasePointersArray,
553 KernelArgs.RTArgs.PointersArray,
554 KernelArgs.RTArgs.SizesArray,
555 KernelArgs.RTArgs.MapTypesArray,
556 KernelArgs.RTArgs.MapNamesArray,
557 KernelArgs.RTArgs.MappersArray,
558 KernelArgs.NumIterations,
562 KernelArgs.DynCGGroupMem};
570 auto FnAttrs =
Attrs.getFnAttrs();
571 auto RetAttrs =
Attrs.getRetAttrs();
573 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
578 bool Param =
true) ->
void {
579 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
580 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
581 if (HasSignExt || HasZeroExt) {
582 assert(AS.getNumAttributes() == 1 &&
583 "Currently not handling extension attr combined with others.");
585 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
588 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
595#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
596#include "llvm/Frontend/OpenMP/OMPKinds.def"
600#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
602 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
603 addAttrSet(RetAttrs, RetAttrSet, false); \
604 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
605 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
606 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
608#include "llvm/Frontend/OpenMP/OMPKinds.def"
622#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
624 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
626 Fn = M.getFunction(Str); \
628#include "llvm/Frontend/OpenMP/OMPKinds.def"
634#define OMP_RTL(Enum, Str, ...) \
636 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
638#include "llvm/Frontend/OpenMP/OMPKinds.def"
642 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
652 LLVMContext::MD_callback,
654 2, {-1, -1},
true)}));
660 addAttributes(FnID, *Fn);
667 assert(Fn &&
"Failed to create OpenMP runtime function");
675 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
679void OpenMPIRBuilder::initialize() { initializeTypes(M); }
690 for (
auto Inst =
Block->getReverseIterator()->begin();
691 Inst !=
Block->getReverseIterator()->end();) {
704void OpenMPIRBuilder::finalize(
Function *Fn) {
708 for (OutlineInfo &OI : OutlineInfos) {
711 if (Fn && OI.getFunction() != Fn) {
716 ParallelRegionBlockSet.
clear();
718 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
728 bool ArgsInZeroAddressSpace = Config.isTargetDevice();
737 ".omp_par", ArgsInZeroAddressSpace);
741 <<
" Exit: " << OI.ExitBB->getName() <<
"\n");
742 assert(Extractor.isEligible() &&
743 "Expected OpenMP outlining to be possible!");
745 for (
auto *V : OI.ExcludeArgsFromAggregate)
746 Extractor.excludeArgFromAggregate(V);
748 Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
752 if (TargetCpuAttr.isStringAttribute())
755 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
756 if (TargetFeaturesAttr.isStringAttribute())
757 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
760 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
762 "OpenMP outlined functions should not return a value!");
767 M.getFunctionList().insertAfter(OuterFn->
getIterator(), OutlinedFn);
774 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
781 "Expected instructions to add in the outlined region entry");
783 End = ArtificialEntry.
rend();
788 if (
I.isTerminator()) {
790 if (OI.EntryBB->getTerminator())
791 OI.EntryBB->getTerminator()->adoptDbgRecords(
792 &ArtificialEntry,
I.getIterator(),
false);
796 I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
799 OI.EntryBB->moveBefore(&ArtificialEntry);
806 if (OI.PostOutlineCB)
807 OI.PostOutlineCB(*OutlinedFn);
811 OutlineInfos = std::move(DeferredOutlines);
832 for (
Function *
F : ConstantAllocaRaiseCandidates)
835 EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
836 [](EmitMetadataErrorKind Kind,
837 const TargetRegionEntryInfo &EntryInfo) ->
void {
838 errs() <<
"Error of kind: " << Kind
839 <<
" when emitting offload entries and metadata during "
840 "OMPIRBuilder finalization \n";
843 if (!OffloadInfoManager.empty())
844 createOffloadEntriesAndInfoMetadata(ErrorReportFn);
846 if (Config.EmitLLVMUsedMetaInfo.value_or(
false)) {
847 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
848 M.getGlobalVariable(
"__openmp_nvptx_data_transfer_temporary_storage")};
849 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
855bool OpenMPIRBuilder::isFinalized() {
return IsFinalized; }
857OpenMPIRBuilder::~OpenMPIRBuilder() {
858 assert(OutlineInfos.empty() &&
"There must be no outstanding outlinings");
866 ConstantInt::get(I32Ty,
Value), Name);
878 UsedArray.
resize(List.size());
879 for (
unsigned I = 0,
E = List.size();
I !=
E; ++
I)
883 if (UsedArray.
empty())
890 GV->setSection(
"llvm.metadata");
894OpenMPIRBuilder::emitKernelExecutionMode(
StringRef KernelName,
896 auto *Int8Ty = Builder.getInt8Ty();
899 ConstantInt::get(Int8Ty,
Mode),
Twine(KernelName,
"_exec_mode"));
907 unsigned Reserve2Flags) {
909 LocFlags |= OMP_IDENT_FLAG_KMPC;
912 IdentMap[{SrcLocStr,
uint64_t(LocFlags) << 31 | Reserve2Flags}];
917 ConstantInt::get(
Int32, Reserve2Flags),
918 ConstantInt::get(
Int32, SrcLocStrSize), SrcLocStr};
920 size_t SrcLocStrArgIdx = 4;
921 if (OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx)
925 SrcLocStr, OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx));
932 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
933 if (
GV.getInitializer() == Initializer)
938 M, OpenMPIRBuilder::Ident,
941 M.getDataLayout().getDefaultGlobalsAddressSpace());
953 SrcLocStrSize = LocStr.
size();
954 Constant *&SrcLocStr = SrcLocStrMap[LocStr];
962 if (
GV.isConstant() &&
GV.hasInitializer() &&
963 GV.getInitializer() == Initializer)
966 SrcLocStr = Builder.CreateGlobalString(
967 LocStr,
"", M.getDataLayout().getDefaultGlobalsAddressSpace(),
975 unsigned Line,
unsigned Column,
981 Buffer.
append(FunctionName);
983 Buffer.
append(std::to_string(Line));
985 Buffer.
append(std::to_string(Column));
988 return getOrCreateSrcLocStr(Buffer.
str(), SrcLocStrSize);
992OpenMPIRBuilder::getOrCreateDefaultSrcLocStr(
uint32_t &SrcLocStrSize) {
993 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
994 return getOrCreateSrcLocStr(UnknownLoc, SrcLocStrSize);
1002 return getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1004 if (
DIFile *DIF = DIL->getFile())
1005 if (std::optional<StringRef> Source = DIF->getSource())
1010 return getOrCreateSrcLocStr(
Function, FileName, DIL->getLine(),
1011 DIL->getColumn(), SrcLocStrSize);
1014Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(
const LocationDescription &
Loc,
1016 return getOrCreateSrcLocStr(
Loc.DL, SrcLocStrSize,
1017 Loc.IP.getBlock()->getParent());
1020Value *OpenMPIRBuilder::getOrCreateThreadID(
Value *Ident) {
1021 return Builder.CreateCall(
1022 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident,
1023 "omp_global_thread_num");
1026OpenMPIRBuilder::InsertPointOrErrorTy
1027OpenMPIRBuilder::createBarrier(
const LocationDescription &
Loc,
Directive Kind,
1028 bool ForceSimpleCall,
bool CheckCancelFlag) {
1029 if (!updateToLocation(
Loc))
1038 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
1041 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
1044 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
1047 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
1050 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
1055 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1057 getOrCreateIdent(SrcLocStr, SrcLocStrSize, BarrierLocFlags),
1058 getOrCreateThreadID(getOrCreateIdent(SrcLocStr, SrcLocStrSize))};
1063 bool UseCancelBarrier =
1064 !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel);
1067 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
1068 UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
1069 : OMPRTL___kmpc_barrier),
1072 if (UseCancelBarrier && CheckCancelFlag)
1073 if (
Error Err = emitCancelationCheckImpl(Result, OMPD_parallel))
1076 return Builder.saveIP();
1079OpenMPIRBuilder::InsertPointOrErrorTy
1080OpenMPIRBuilder::createCancel(
const LocationDescription &
Loc,
1082 omp::Directive CanceledDirective) {
1083 if (!updateToLocation(
Loc))
1087 auto *UI = Builder.CreateUnreachable();
1092 Builder.SetInsertPoint(ThenTI);
1094 Value *CancelKind =
nullptr;
1095 switch (CanceledDirective) {
1096#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1097 case DirectiveEnum: \
1098 CancelKind = Builder.getInt32(Value); \
1100#include "llvm/Frontend/OpenMP/OMPKinds.def"
1106 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1107 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1108 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
1110 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
1111 auto ExitCB = [
this, CanceledDirective,
Loc](InsertPointTy IP) ->
Error {
1112 if (CanceledDirective == OMPD_parallel) {
1114 Builder.restoreIP(IP);
1115 return createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
1116 omp::Directive::OMPD_unknown,
1125 if (
Error Err = emitCancelationCheckImpl(Result, CanceledDirective, ExitCB))
1129 Builder.SetInsertPoint(UI->getParent());
1130 UI->eraseFromParent();
1132 return Builder.saveIP();
1135OpenMPIRBuilder::InsertPointOrErrorTy
1136OpenMPIRBuilder::createCancellationPoint(
const LocationDescription &
Loc,
1137 omp::Directive CanceledDirective) {
1138 if (!updateToLocation(
Loc))
1142 auto *UI = Builder.CreateUnreachable();
1143 Builder.SetInsertPoint(UI);
1145 Value *CancelKind =
nullptr;
1146 switch (CanceledDirective) {
1147#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1148 case DirectiveEnum: \
1149 CancelKind = Builder.getInt32(Value); \
1151#include "llvm/Frontend/OpenMP/OMPKinds.def"
1157 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1158 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1159 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
1161 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancellationpoint), Args);
1162 auto ExitCB = [
this, CanceledDirective,
Loc](InsertPointTy IP) ->
Error {
1163 if (CanceledDirective == OMPD_parallel) {
1165 Builder.restoreIP(IP);
1166 return createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
1167 omp::Directive::OMPD_unknown,
1176 if (
Error Err = emitCancelationCheckImpl(Result, CanceledDirective, ExitCB))
1180 Builder.SetInsertPoint(UI->getParent());
1181 UI->eraseFromParent();
1183 return Builder.saveIP();
1186OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetKernel(
1187 const LocationDescription &
Loc, InsertPointTy AllocaIP,
Value *&Return,
1190 if (!updateToLocation(
Loc))
1193 Builder.restoreIP(AllocaIP);
1194 auto *KernelArgsPtr =
1195 Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs,
nullptr,
"kernel_args");
1196 updateToLocation(
Loc);
1200 Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr,
I);
1201 Builder.CreateAlignedStore(
1203 M.getDataLayout().getPrefTypeAlign(KernelArgs[
I]->getType()));
1207 NumThreads, HostPtr, KernelArgsPtr};
1209 Return = Builder.CreateCall(
1210 getOrCreateRuntimeFunction(M, OMPRTL___tgt_target_kernel),
1213 return Builder.saveIP();
1216OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitKernelLaunch(
1217 const LocationDescription &
Loc,
Value *OutlinedFnID,
1218 EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args,
1219 Value *DeviceID,
Value *RTLoc, InsertPointTy AllocaIP) {
1221 if (!updateToLocation(
Loc))
1234 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1238 Value *Return =
nullptr;
1242 getKernelArgsVector(Args, Builder, ArgsVector);
1257 Builder.restoreIP(emitTargetKernel(
1258 Builder, AllocaIP, Return, RTLoc, DeviceID,
Args.NumTeams.front(),
1259 Args.NumThreads.front(), OutlinedFnID, ArgsVector));
1266 Builder.CreateCondBr(
Failed, OffloadFailedBlock, OffloadContBlock);
1268 auto CurFn = Builder.GetInsertBlock()->getParent();
1269 emitBlock(OffloadFailedBlock, CurFn);
1270 InsertPointOrErrorTy AfterIP = EmitTargetCallFallbackCB(Builder.saveIP());
1272 return AfterIP.takeError();
1273 Builder.restoreIP(*AfterIP);
1274 emitBranch(OffloadContBlock);
1275 emitBlock(OffloadContBlock, CurFn,
true);
1276 return Builder.saveIP();
1279Error OpenMPIRBuilder::emitCancelationCheckImpl(
1280 Value *CancelFlag, omp::Directive CanceledDirective,
1281 FinalizeCallbackTy ExitCB) {
1282 assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
1283 "Unexpected cancellation!");
1288 if (Builder.GetInsertPoint() == BB->
end()) {
1294 NonCancellationBlock =
SplitBlock(BB, &*Builder.GetInsertPoint());
1296 Builder.SetInsertPoint(BB);
1302 Value *
Cmp = Builder.CreateIsNull(CancelFlag);
1303 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
1308 Builder.SetInsertPoint(CancellationBlock);
1310 if (
Error Err = ExitCB(Builder.saveIP()))
1312 auto &FI = FinalizationStack.back();
1313 if (
Error Err = FI.FiniCB(Builder.saveIP()))
1317 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->
begin());
1336 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1339 "Expected at least tid and bounded tid as arguments");
1340 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1343 assert(CI &&
"Expected call instruction to outlined function");
1344 CI->
getParent()->setName(
"omp_parallel");
1346 Builder.SetInsertPoint(CI);
1347 Type *PtrTy = OMPIRBuilder->VoidPtr;
1351 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
1355 Value *Args = ArgsAlloca;
1359 Args = Builder.CreatePointerCast(ArgsAlloca, PtrTy);
1360 Builder.restoreIP(CurrentIP);
1363 for (
unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
1365 Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
1367 Builder.CreateStore(V, StoreAddress);
1371 IfCondition ? Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32)
1372 : Builder.getInt32(1);
1375 Value *Parallel51CallArgs[] = {
1379 NumThreads ? NumThreads : Builder.getInt32(-1),
1380 Builder.getInt32(-1),
1384 Builder.getInt64(NumCapturedVars)};
1387 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_parallel_51);
1389 Builder.CreateCall(RTLFn, Parallel51CallArgs);
1392 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1395 Builder.SetInsertPoint(PrivTID);
1397 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1404 I->eraseFromParent();
1421 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call_if);
1424 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
1427 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1435 F->addMetadata(LLVMContext::MD_callback,
1444 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1447 "Expected at least tid and bounded tid as arguments");
1448 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1451 CI->
getParent()->setName(
"omp_parallel");
1452 Builder.SetInsertPoint(CI);
1455 Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
1459 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1461 Value *
Cond = Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32);
1468 auto PtrTy = OMPIRBuilder->VoidPtr;
1469 if (IfCondition && NumCapturedVars == 0) {
1474 Builder.CreateCall(RTLFn, RealArgs);
1477 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1480 Builder.SetInsertPoint(PrivTID);
1482 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1489 I->eraseFromParent();
1493OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel(
1494 const LocationDescription &
Loc, InsertPointTy OuterAllocaIP,
1495 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
1496 FinalizeCallbackTy FiniCB,
Value *IfCondition,
Value *NumThreads,
1497 omp::ProcBindKind ProcBind,
bool IsCancellable) {
1500 if (!updateToLocation(
Loc))
1504 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1505 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1506 Value *ThreadID = getOrCreateThreadID(Ident);
1512 bool ArgsInZeroAddressSpace = Config.isTargetDevice();
1516 if (NumThreads && !Config.isTargetDevice()) {
1519 Builder.CreateIntCast(NumThreads,
Int32,
false)};
1521 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args);
1524 if (ProcBind != OMP_PROC_BIND_default) {
1528 ConstantInt::get(
Int32,
unsigned(ProcBind),
true)};
1530 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args);
1533 BasicBlock *InsertBB = Builder.GetInsertBlock();
1538 BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
1546 InsertPointTy NewOuter(OuterAllocaBlock, OuterAllocaBlock->
begin());
1547 Builder.restoreIP(NewOuter);
1548 AllocaInst *TIDAddrAlloca = Builder.CreateAlloca(
Int32,
nullptr,
"tid.addr");
1550 Builder.CreateAlloca(
Int32,
nullptr,
"zero.addr");
1553 if (ArgsInZeroAddressSpace && M.getDataLayout().getAllocaAddrSpace() != 0) {
1556 TIDAddrAlloca, PointerType ::get(M.getContext(), 0),
"tid.addr.ascast");
1560 PointerType ::get(M.getContext(), 0),
1561 "zero.addr.ascast");
1582 auto FiniCBWrapper = [&](InsertPointTy IP) {
1587 Builder.restoreIP(IP);
1589 IP = InsertPointTy(
I->getParent(),
I->getIterator());
1593 "Unexpected insertion point for finalization call!");
1597 FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
1602 InsertPointTy InnerAllocaIP = Builder.saveIP();
1605 Builder.CreateAlloca(
Int32,
nullptr,
"tid.addr.local");
1609 ToBeDeleted.
push_back(Builder.CreateLoad(
Int32, TIDAddr,
"tid.addr.use"));
1611 Builder.CreateLoad(
Int32, ZeroAddr,
"zero.addr.use");
1629 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1632 assert(BodyGenCB &&
"Expected body generation callback!");
1633 InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->
begin());
1634 if (
Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP))
1637 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1640 if (Config.isTargetDevice()) {
1642 OI.PostOutlineCB = [=, ToBeDeletedVec =
1643 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1645 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1646 ThreadID, ToBeDeletedVec);
1650 OI.PostOutlineCB = [=, ToBeDeletedVec =
1651 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1653 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1657 OI.OuterAllocaBB = OuterAllocaBlock;
1658 OI.EntryBB = PRegEntryBB;
1659 OI.ExitBB = PRegExitBB;
1663 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
1674 ".omp_par", ArgsInZeroAddressSpace);
1679 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1681 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands,
1686 return GV->getValueType() == OpenMPIRBuilder::Ident;
1691 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1694 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num);
1697 if (&V == TIDAddr || &V == ZeroAddr) {
1698 OI.ExcludeArgsFromAggregate.push_back(&V);
1703 for (
Use &U : V.uses())
1705 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1715 if (!V.getType()->isPointerTy()) {
1719 Builder.restoreIP(OuterAllocaIP);
1721 Builder.CreateAlloca(V.getType(),
nullptr, V.getName() +
".reloaded");
1725 Builder.SetInsertPoint(InsertBB,
1727 Builder.CreateStore(&V,
Ptr);
1730 Builder.restoreIP(InnerAllocaIP);
1731 Inner = Builder.CreateLoad(V.getType(),
Ptr);
1734 Value *ReplacementValue =
nullptr;
1737 ReplacementValue = PrivTID;
1739 InsertPointOrErrorTy AfterIP =
1740 PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue);
1742 return AfterIP.takeError();
1743 Builder.restoreIP(*AfterIP);
1745 InnerAllocaIP.getBlock(),
1746 InnerAllocaIP.getBlock()->getTerminator()->getIterator()};
1748 assert(ReplacementValue &&
1749 "Expected copy/create callback to set replacement value!");
1750 if (ReplacementValue == &V)
1755 UPtr->set(ReplacementValue);
1780 for (
Value *Output : Outputs)
1783 assert(Outputs.empty() &&
1784 "OpenMP outlining should not produce live-out values!");
1786 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
1788 for (
auto *BB : Blocks)
1795 auto FiniInfo = FinalizationStack.pop_back_val();
1797 assert(FiniInfo.DK == OMPD_parallel &&
1798 "Unexpected finalization stack state!");
1802 InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->
getIterator());
1803 if (
Error Err = FiniCB(PreFiniIP))
1807 addOutlineInfo(std::move(OI));
1809 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1810 UI->eraseFromParent();
1815void OpenMPIRBuilder::emitFlush(
const LocationDescription &
Loc) {
1818 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1819 Value *
Args[] = {getOrCreateIdent(SrcLocStr, SrcLocStrSize)};
1821 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args);
1824void OpenMPIRBuilder::createFlush(
const LocationDescription &
Loc) {
1825 if (!updateToLocation(
Loc))
1830void OpenMPIRBuilder::emitTaskwaitImpl(
const LocationDescription &
Loc) {
1834 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1835 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1836 Value *
Args[] = {Ident, getOrCreateThreadID(Ident)};
1839 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait),
1843void OpenMPIRBuilder::createTaskwait(
const LocationDescription &
Loc) {
1844 if (!updateToLocation(
Loc))
1846 emitTaskwaitImpl(
Loc);
1849void OpenMPIRBuilder::emitTaskyieldImpl(
const LocationDescription &
Loc) {
1852 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1853 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1855 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), I32Null};
1857 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield),
1861void OpenMPIRBuilder::createTaskyield(
const LocationDescription &
Loc) {
1862 if (!updateToLocation(
Loc))
1864 emitTaskyieldImpl(
Loc);
1873 OpenMPIRBuilder &OMPBuilder,
1876 if (Dependencies.
empty())
1896 Type *DependInfo = OMPBuilder.DependInfo;
1897 Module &M = OMPBuilder.M;
1899 Value *DepArray =
nullptr;
1900 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
1901 Builder.SetInsertPoint(
1902 OldIP.getBlock()->getParent()->getEntryBlock().getTerminator());
1905 DepArray = Builder.CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
1907 Builder.restoreIP(OldIP);
1909 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
1911 Builder.CreateConstInBoundsGEP2_64(DepArrayTy, DepArray, 0, DepIdx);
1913 Value *Addr = Builder.CreateStructGEP(
1915 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
1916 Value *DepValPtr = Builder.CreatePtrToInt(Dep.DepVal, Builder.getInt64Ty());
1917 Builder.CreateStore(DepValPtr, Addr);
1920 DependInfo,
Base,
static_cast<unsigned int>(RTLDependInfoFields::Len));
1921 Builder.CreateStore(
1922 Builder.getInt64(M.getDataLayout().getTypeStoreSize(Dep.DepValueType)),
1925 Value *Flags = Builder.CreateStructGEP(
1927 static_cast<unsigned int>(RTLDependInfoFields::Flags));
1928 Builder.CreateStore(
1929 ConstantInt::get(Builder.getInt8Ty(),
1930 static_cast<unsigned int>(Dep.DepKind)),
1936OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
1937 const LocationDescription &
Loc, InsertPointTy AllocaIP,
1938 BodyGenCallbackTy BodyGenCB,
bool Tied,
Value *Final,
Value *IfCondition,
1942 if (!updateToLocation(
Loc))
1943 return InsertPointTy();
1946 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1947 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1964 BasicBlock *TaskExitBB = splitBB(Builder,
true,
"task.exit");
1965 BasicBlock *TaskBodyBB = splitBB(Builder,
true,
"task.body");
1967 splitBB(Builder,
true,
"task.alloca");
1969 InsertPointTy TaskAllocaIP =
1970 InsertPointTy(TaskAllocaBB, TaskAllocaBB->
begin());
1971 InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->
begin());
1972 if (
Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP))
1976 OI.EntryBB = TaskAllocaBB;
1977 OI.OuterAllocaBB = AllocaIP.getBlock();
1978 OI.ExitBB = TaskExitBB;
1983 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
1985 OI.PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
1986 Mergeable, Priority, EventHandle, TaskAllocaBB,
1987 ToBeDeleted](
Function &OutlinedFn)
mutable {
1990 "there must be a single user for the outlined function");
1995 bool HasShareds = StaleCI->
arg_size() > 1;
1996 Builder.SetInsertPoint(StaleCI);
2001 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
2005 Value *ThreadID = getOrCreateThreadID(Ident);
2017 Value *Flags = Builder.getInt32(Tied);
2020 Builder.CreateSelect(Final, Builder.getInt32(2), Builder.getInt32(0));
2021 Flags = Builder.CreateOr(FinalFlag, Flags);
2025 Flags = Builder.CreateOr(Builder.getInt32(4), Flags);
2027 Flags = Builder.CreateOr(Builder.getInt32(32), Flags);
2033 Value *TaskSize = Builder.getInt64(
2034 divideCeil(M.getDataLayout().getTypeSizeInBits(Task), 8));
2039 Value *SharedsSize = Builder.getInt64(0);
2043 assert(ArgStructAlloca &&
2044 "Unable to find the alloca instruction corresponding to arguments "
2045 "for extracted function");
2048 assert(ArgStructType &&
"Unable to find struct type corresponding to "
2049 "arguments for extracted function");
2051 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
2056 CallInst *TaskData = Builder.CreateCall(
2057 TaskAllocFn, {Ident, ThreadID, Flags,
2058 TaskSize, SharedsSize,
2065 Function *TaskDetachFn = getOrCreateRuntimeFunctionPtr(
2066 OMPRTL___kmpc_task_allow_completion_event);
2068 Builder.CreateCall(TaskDetachFn, {Ident, ThreadID, TaskData});
2070 Builder.CreatePointerBitCastOrAddrSpaceCast(EventHandle,
2071 Builder.getPtrTy(0));
2072 EventVal = Builder.CreatePtrToInt(EventVal, Builder.getInt64Ty());
2073 Builder.CreateStore(EventVal, EventHandleAddr);
2079 Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskData);
2080 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2098 Builder.CreateInBoundsGEP(TaskPtr, TaskData, {
Zero,
Zero});
2101 VoidPtr, VoidPtr, Builder.getInt32Ty(), VoidPtr, VoidPtr);
2102 Value *PriorityData = Builder.CreateInBoundsGEP(
2103 TaskStructType, TaskGEP, {
Zero, ConstantInt::get(
Int32Ty, 4)});
2106 Value *CmplrData = Builder.CreateInBoundsGEP(CmplrStructType,
2108 Builder.CreateStore(Priority, CmplrData);
2133 splitBB(Builder,
true,
"if.end");
2135 Builder.GetInsertPoint()->
getParent()->getTerminator();
2136 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
2137 Builder.SetInsertPoint(IfTerminator);
2140 Builder.SetInsertPoint(ElseTI);
2142 if (Dependencies.size()) {
2144 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_wait_deps);
2147 {Ident, ThreadID, Builder.getInt32(Dependencies.size()), DepArray,
2148 ConstantInt::get(Builder.getInt32Ty(), 0),
2152 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0);
2154 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0);
2155 Builder.CreateCall(TaskBeginFn, {Ident, ThreadID, TaskData});
2158 CI = Builder.CreateCall(&OutlinedFn, {ThreadID, TaskData});
2160 CI = Builder.CreateCall(&OutlinedFn, {ThreadID});
2162 Builder.CreateCall(TaskCompleteFn, {Ident, ThreadID, TaskData});
2163 Builder.SetInsertPoint(ThenTI);
2166 if (Dependencies.size()) {
2168 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps);
2171 {Ident, ThreadID, TaskData, Builder.getInt32(Dependencies.size()),
2172 DepArray, ConstantInt::get(Builder.getInt32Ty(), 0),
2177 Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task);
2178 Builder.CreateCall(TaskFn, {Ident, ThreadID, TaskData});
2183 Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin());
2185 LoadInst *Shareds = Builder.CreateLoad(VoidPtr, OutlinedFn.
getArg(1));
2187 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2191 I->eraseFromParent();
2194 addOutlineInfo(std::move(OI));
2195 Builder.SetInsertPoint(TaskExitBB, TaskExitBB->
begin());
2197 return Builder.saveIP();
2200OpenMPIRBuilder::InsertPointOrErrorTy
2201OpenMPIRBuilder::createTaskgroup(
const LocationDescription &
Loc,
2202 InsertPointTy AllocaIP,
2203 BodyGenCallbackTy BodyGenCB) {
2204 if (!updateToLocation(
Loc))
2205 return InsertPointTy();
2208 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
2209 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2210 Value *ThreadID = getOrCreateThreadID(Ident);
2214 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup);
2215 Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
2217 BasicBlock *TaskgroupExitBB = splitBB(Builder,
true,
"taskgroup.exit");
2218 if (
Error Err = BodyGenCB(AllocaIP, Builder.saveIP()))
2221 Builder.SetInsertPoint(TaskgroupExitBB);
2224 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup);
2225 Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
2227 return Builder.saveIP();
2230OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSections(
2231 const LocationDescription &
Loc, InsertPointTy AllocaIP,
2233 FinalizeCallbackTy FiniCB,
bool IsCancellable,
bool IsNowait) {
2236 if (!updateToLocation(
Loc))
2242 auto FiniCBWrapper = [&](InsertPointTy IP) {
2251 CancellationBranches.
push_back(DummyBranch);
2255 FinalizationStack.
push_back({FiniCBWrapper, OMPD_sections, IsCancellable});
2273 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP,
Value *IndVar) ->
Error {
2274 Builder.restoreIP(CodeGenIP);
2276 splitBBWithSuffix(Builder,
false,
".sections.after");
2280 unsigned CaseNumber = 0;
2281 for (
auto SectionCB : SectionCBs) {
2283 M.getContext(),
"omp_section_loop.body.case", CurFn,
Continue);
2284 SwitchStmt->
addCase(Builder.getInt32(CaseNumber), CaseBB);
2285 Builder.SetInsertPoint(CaseBB);
2287 if (
Error Err = SectionCB(InsertPointTy(), {CaseEndBr->
getParent(),
2299 Value *LB = ConstantInt::get(I32Ty, 0);
2300 Value *UB = ConstantInt::get(I32Ty, SectionCBs.size());
2301 Value *
ST = ConstantInt::get(I32Ty, 1);
2303 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
2307 InsertPointOrErrorTy WsloopIP =
2308 applyStaticWorkshareLoop(
Loc.DL, *
LoopInfo, AllocaIP,
2309 WorksharingLoopType::ForStaticLoop, !IsNowait);
2311 return WsloopIP.takeError();
2312 InsertPointTy AfterIP = *WsloopIP;
2315 assert(LoopFini &&
"Bad structure of static workshare loop finalization");
2318 auto FiniInfo = FinalizationStack.pop_back_val();
2319 assert(FiniInfo.DK == OMPD_sections &&
2320 "Unexpected finalization stack state!");
2321 if (FinalizeCallbackTy &CB = FiniInfo.FiniCB) {
2322 Builder.restoreIP(AfterIP);
2324 splitBBWithSuffix(Builder,
true,
"sections.fini");
2325 if (
Error Err = CB(Builder.saveIP()))
2327 AfterIP = {FiniBB, FiniBB->
begin()};
2331 for (
BranchInst *DummyBranch : CancellationBranches) {
2339OpenMPIRBuilder::InsertPointOrErrorTy
2340OpenMPIRBuilder::createSection(
const LocationDescription &
Loc,
2341 BodyGenCallbackTy BodyGenCB,
2342 FinalizeCallbackTy FiniCB) {
2343 if (!updateToLocation(
Loc))
2346 auto FiniCBWrapper = [&](InsertPointTy IP) {
2357 Builder.restoreIP(IP);
2358 auto *CaseBB =
Loc.IP.getBlock();
2362 IP = InsertPointTy(
I->getParent(),
I->getIterator());
2366 Directive OMPD = Directive::OMPD_sections;
2369 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
2377 return OpenMPIRBuilder::InsertPointTy(
I->getParent(),
IT);
2380Value *OpenMPIRBuilder::getGPUThreadID() {
2381 return Builder.CreateCall(
2382 getOrCreateRuntimeFunction(M,
2383 OMPRTL___kmpc_get_hardware_thread_id_in_block),
2387Value *OpenMPIRBuilder::getGPUWarpSize() {
2388 return Builder.CreateCall(
2389 getOrCreateRuntimeFunction(M, OMPRTL___kmpc_get_warp_size), {});
2392Value *OpenMPIRBuilder::getNVPTXWarpID() {
2393 unsigned LaneIDBits =
Log2_32(Config.getGridValue().GV_Warp_Size);
2394 return Builder.CreateAShr(getGPUThreadID(), LaneIDBits,
"nvptx_warp_id");
2397Value *OpenMPIRBuilder::getNVPTXLaneID() {
2398 unsigned LaneIDBits =
Log2_32(Config.getGridValue().GV_Warp_Size);
2399 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
2400 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
2401 return Builder.CreateAnd(getGPUThreadID(), Builder.getInt32(LaneIDMask),
2405Value *OpenMPIRBuilder::castValueToType(InsertPointTy AllocaIP,
Value *From,
2408 uint64_t FromSize = M.getDataLayout().getTypeStoreSize(FromType);
2409 uint64_t ToSize = M.getDataLayout().getTypeStoreSize(ToType);
2410 assert(FromSize > 0 &&
"From size must be greater than zero");
2411 assert(ToSize > 0 &&
"To size must be greater than zero");
2412 if (FromType == ToType)
2414 if (FromSize == ToSize)
2415 return Builder.CreateBitCast(From, ToType);
2417 return Builder.CreateIntCast(From, ToType,
true);
2418 InsertPointTy SaveIP = Builder.saveIP();
2419 Builder.restoreIP(AllocaIP);
2420 Value *CastItem = Builder.CreateAlloca(ToType);
2421 Builder.restoreIP(SaveIP);
2423 Value *ValCastItem = Builder.CreatePointerBitCastOrAddrSpaceCast(
2424 CastItem, Builder.getPtrTy(0));
2425 Builder.CreateStore(From, ValCastItem);
2426 return Builder.CreateLoad(ToType, CastItem);
2429Value *OpenMPIRBuilder::createRuntimeShuffleFunction(InsertPointTy AllocaIP,
2433 uint64_t Size = M.getDataLayout().getTypeStoreSize(ElementType);
2434 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
2438 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
2440 Builder.CreateIntCast(getGPUWarpSize(), Builder.getInt16Ty(),
true);
2441 Function *ShuffleFunc = getOrCreateRuntimeFunctionPtr(
2442 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
2443 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
2444 Value *WarpSizeCast =
2445 Builder.CreateIntCast(WarpSize, Builder.getInt16Ty(),
true);
2446 Value *ShuffleCall =
2447 Builder.CreateCall(ShuffleFunc, {ElemCast,
Offset, WarpSizeCast});
2448 return castValueToType(AllocaIP, ShuffleCall, CastTy);
2451void OpenMPIRBuilder::shuffleAndStore(InsertPointTy AllocaIP,
Value *SrcAddr,
2454 uint64_t Size = M.getDataLayout().getTypeStoreSize(ElemType);
2465 Type *IndexTy = Builder.getIndexTy(
2466 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2467 Value *ElemPtr = DstAddr;
2469 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
2473 Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2474 Ptr, Builder.getPtrTy(0),
Ptr->getName() +
".ascast");
2476 Builder.CreateGEP(ElemType, SrcAddr, {ConstantInt::get(IndexTy, 1)});
2477 ElemPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2478 ElemPtr, Builder.getPtrTy(0), ElemPtr->
getName() +
".ascast");
2481 if ((
Size / IntSize) > 1) {
2482 Value *PtrEnd = Builder.CreatePointerBitCastOrAddrSpaceCast(
2483 SrcAddrGEP, Builder.getPtrTy());
2488 BasicBlock *CurrentBB = Builder.GetInsertBlock();
2489 emitBlock(PreCondBB, CurFunc);
2491 Builder.CreatePHI(
Ptr->getType(), 2);
2494 Builder.CreatePHI(ElemPtr->
getType(), 2);
2498 Value *PtrDiff = Builder.CreatePtrDiff(
2499 Builder.getInt8Ty(), PtrEnd,
2500 Builder.CreatePointerBitCastOrAddrSpaceCast(
Ptr, Builder.getPtrTy()));
2501 Builder.CreateCondBr(
2502 Builder.CreateICmpSGT(PtrDiff, Builder.getInt64(IntSize - 1)), ThenBB,
2504 emitBlock(ThenBB, CurFunc);
2505 Value *Res = createRuntimeShuffleFunction(
2507 Builder.CreateAlignedLoad(
2508 IntType,
Ptr, M.getDataLayout().getPrefTypeAlign(ElemType)),
2510 Builder.CreateAlignedStore(Res, ElemPtr,
2511 M.getDataLayout().getPrefTypeAlign(ElemType));
2513 Builder.CreateGEP(IntType,
Ptr, {ConstantInt::get(IndexTy, 1)});
2514 Value *LocalElemPtr =
2515 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
2518 emitBranch(PreCondBB);
2519 emitBlock(ExitBB, CurFunc);
2521 Value *Res = createRuntimeShuffleFunction(
2522 AllocaIP, Builder.CreateLoad(IntType,
Ptr), IntType,
Offset);
2525 Res = Builder.CreateTrunc(Res, ElemType);
2526 Builder.CreateStore(Res, ElemPtr);
2527 Ptr = Builder.CreateGEP(IntType,
Ptr, {ConstantInt::get(IndexTy, 1)});
2529 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
2535void OpenMPIRBuilder::emitReductionListCopy(
2536 InsertPointTy AllocaIP, CopyAction Action,
Type *ReductionArrayTy,
2538 CopyOptionsTy CopyOptions) {
2539 Type *IndexTy = Builder.getIndexTy(
2540 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2541 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
2545 for (
auto En :
enumerate(ReductionInfos)) {
2546 const ReductionInfo &RI = En.value();
2547 Value *SrcElementAddr =
nullptr;
2548 Value *DestElementAddr =
nullptr;
2549 Value *DestElementPtrAddr =
nullptr;
2551 bool ShuffleInElement =
false;
2554 bool UpdateDestListPtr =
false;
2557 Value *SrcElementPtrAddr = Builder.CreateInBoundsGEP(
2558 ReductionArrayTy, SrcBase,
2559 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2560 SrcElementAddr = Builder.CreateLoad(Builder.getPtrTy(), SrcElementPtrAddr);
2564 DestElementPtrAddr = Builder.CreateInBoundsGEP(
2565 ReductionArrayTy, DestBase,
2566 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2568 case CopyAction::RemoteLaneToThread: {
2569 InsertPointTy CurIP = Builder.saveIP();
2570 Builder.restoreIP(AllocaIP);
2571 AllocaInst *DestAlloca = Builder.CreateAlloca(RI.ElementType,
nullptr,
2572 ".omp.reduction.element");
2574 M.getDataLayout().getPrefTypeAlign(RI.ElementType));
2575 DestElementAddr = DestAlloca;
2577 Builder.CreateAddrSpaceCast(DestElementAddr, Builder.getPtrTy(),
2578 DestElementAddr->
getName() +
".ascast");
2579 Builder.restoreIP(CurIP);
2580 ShuffleInElement =
true;
2581 UpdateDestListPtr =
true;
2584 case CopyAction::ThreadCopy: {
2586 Builder.CreateLoad(Builder.getPtrTy(), DestElementPtrAddr);
2593 if (ShuffleInElement) {
2594 shuffleAndStore(AllocaIP, SrcElementAddr, DestElementAddr, RI.ElementType,
2595 RemoteLaneOffset, ReductionArrayTy);
2597 switch (RI.EvaluationKind) {
2598 case EvalKind::Scalar: {
2599 Value *Elem = Builder.CreateLoad(RI.ElementType, SrcElementAddr);
2601 Builder.CreateStore(Elem, DestElementAddr);
2604 case EvalKind::Complex: {
2605 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
2606 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
2607 Value *SrcReal = Builder.CreateLoad(
2608 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
2609 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
2610 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
2611 Value *SrcImg = Builder.CreateLoad(
2612 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
2614 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
2615 RI.ElementType, DestElementAddr, 0, 0,
".realp");
2616 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
2617 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
2618 Builder.CreateStore(SrcReal, DestRealPtr);
2619 Builder.CreateStore(SrcImg, DestImgPtr);
2622 case EvalKind::Aggregate: {
2623 Value *SizeVal = Builder.getInt64(
2624 M.getDataLayout().getTypeStoreSize(RI.ElementType));
2625 Builder.CreateMemCpy(
2626 DestElementAddr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
2627 SrcElementAddr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
2639 if (UpdateDestListPtr) {
2640 Value *CastDestAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2641 DestElementAddr, Builder.getPtrTy(),
2642 DestElementAddr->
getName() +
".ascast");
2643 Builder.CreateStore(CastDestAddr, DestElementPtrAddr);
2650 AttributeList FuncAttrs) {
2651 InsertPointTy SavedIP = Builder.saveIP();
2654 Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
2658 "_omp_reduction_inter_warp_copy_func", &M);
2663 Builder.SetInsertPoint(EntryBB);
2681 "__openmp_nvptx_data_transfer_temporary_storage";
2682 GlobalVariable *TransferMedium = M.getGlobalVariable(TransferMediumName);
2683 unsigned WarpSize = Config.getGridValue().GV_Warp_Size;
2685 if (!TransferMedium) {
2694 Value *GPUThreadID = getGPUThreadID();
2696 Value *LaneID = getNVPTXLaneID();
2698 Value *WarpID = getNVPTXWarpID();
2700 InsertPointTy AllocaIP =
2701 InsertPointTy(Builder.GetInsertBlock(),
2702 Builder.GetInsertBlock()->getFirstInsertionPt());
2705 Builder.restoreIP(AllocaIP);
2706 AllocaInst *ReduceListAlloca = Builder.CreateAlloca(
2707 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
2709 Builder.CreateAlloca(Arg1Type,
nullptr, NumWarpsArg->
getName() +
".addr");
2710 Value *ReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2711 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
2712 Value *NumWarpsAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2713 NumWarpsAlloca, Builder.getPtrTy(0),
2714 NumWarpsAlloca->
getName() +
".ascast");
2715 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
2716 Builder.CreateStore(NumWarpsArg, NumWarpsAddrCast);
2718 InsertPointTy CodeGenIP =
2720 Builder.restoreIP(CodeGenIP);
2723 Builder.CreateLoad(Builder.getPtrTy(), ReduceListAddrCast);
2725 for (
auto En :
enumerate(ReductionInfos)) {
2730 const ReductionInfo &RI = En.value();
2731 unsigned RealTySize = M.getDataLayout().getTypeAllocSize(RI.ElementType);
2732 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
2735 unsigned NumIters = RealTySize / TySize;
2738 Value *Cnt =
nullptr;
2739 Value *CntAddr =
nullptr;
2743 CodeGenIP = Builder.saveIP();
2744 Builder.restoreIP(AllocaIP);
2746 Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
".cnt.addr");
2748 CntAddr = Builder.CreateAddrSpaceCast(CntAddr, Builder.getPtrTy(),
2749 CntAddr->
getName() +
".ascast");
2750 Builder.restoreIP(CodeGenIP);
2757 emitBlock(PrecondBB, Builder.GetInsertBlock()->getParent());
2758 Cnt = Builder.CreateLoad(Builder.getInt32Ty(), CntAddr,
2760 Value *
Cmp = Builder.CreateICmpULT(
2761 Cnt, ConstantInt::get(Builder.getInt32Ty(), NumIters));
2762 Builder.CreateCondBr(Cmp, BodyBB, ExitBB);
2763 emitBlock(BodyBB, Builder.GetInsertBlock()->getParent());
2767 InsertPointOrErrorTy BarrierIP1 =
2768 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
2769 omp::Directive::OMPD_unknown,
2773 return BarrierIP1.takeError();
2779 Value *IsWarpMaster = Builder.CreateIsNull(LaneID,
"warp_master");
2780 Builder.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
2781 emitBlock(ThenBB, Builder.GetInsertBlock()->getParent());
2784 auto *RedListArrayTy =
2786 Type *IndexTy = Builder.getIndexTy(
2787 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2789 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
2790 {ConstantInt::get(IndexTy, 0),
2791 ConstantInt::get(IndexTy, En.index())});
2793 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
2795 ElemPtr = Builder.CreateGEP(Builder.getInt32Ty(), ElemPtr, Cnt);
2799 Value *MediumPtr = Builder.CreateInBoundsGEP(
2800 ArrayTy, TransferMedium, {Builder.getInt64(0), WarpID});
2803 Value *Elem = Builder.CreateLoad(CType, ElemPtr);
2805 Builder.CreateStore(Elem, MediumPtr,
2807 Builder.CreateBr(MergeBB);
2810 emitBlock(ElseBB, Builder.GetInsertBlock()->getParent());
2811 Builder.CreateBr(MergeBB);
2814 emitBlock(MergeBB, Builder.GetInsertBlock()->getParent());
2815 InsertPointOrErrorTy BarrierIP2 =
2816 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
2817 omp::Directive::OMPD_unknown,
2821 return BarrierIP2.takeError();
2828 Value *NumWarpsVal =
2829 Builder.CreateLoad(Builder.getInt32Ty(), NumWarpsAddrCast);
2831 Value *IsActiveThread =
2832 Builder.CreateICmpULT(GPUThreadID, NumWarpsVal,
"is_active_thread");
2833 Builder.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
2835 emitBlock(W0ThenBB, Builder.GetInsertBlock()->getParent());
2839 Value *SrcMediumPtrVal = Builder.CreateInBoundsGEP(
2840 ArrayTy, TransferMedium, {Builder.getInt64(0), GPUThreadID});
2842 Value *TargetElemPtrPtr =
2843 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
2844 {ConstantInt::get(IndexTy, 0),
2845 ConstantInt::get(IndexTy, En.index())});
2846 Value *TargetElemPtrVal =
2847 Builder.CreateLoad(Builder.getPtrTy(), TargetElemPtrPtr);
2848 Value *TargetElemPtr = TargetElemPtrVal;
2851 Builder.CreateGEP(Builder.getInt32Ty(), TargetElemPtr, Cnt);
2854 Value *SrcMediumValue =
2855 Builder.CreateLoad(CType, SrcMediumPtrVal,
true);
2856 Builder.CreateStore(SrcMediumValue, TargetElemPtr);
2857 Builder.CreateBr(W0MergeBB);
2859 emitBlock(W0ElseBB, Builder.GetInsertBlock()->getParent());
2860 Builder.CreateBr(W0MergeBB);
2862 emitBlock(W0MergeBB, Builder.GetInsertBlock()->getParent());
2865 Cnt = Builder.CreateNSWAdd(
2866 Cnt, ConstantInt::get(Builder.getInt32Ty(), 1));
2867 Builder.CreateStore(Cnt, CntAddr,
false);
2869 auto *CurFn = Builder.GetInsertBlock()->
getParent();
2870 emitBranch(PrecondBB);
2871 emitBlock(ExitBB, CurFn);
2873 RealTySize %= TySize;
2877 Builder.CreateRetVoid();
2878 Builder.restoreIP(SavedIP);
2883Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
2885 AttributeList FuncAttrs) {
2889 {Builder.getPtrTy(), Builder.getInt16Ty(),
2890 Builder.getInt16Ty(), Builder.getInt16Ty()},
2894 "_omp_reduction_shuffle_and_reduce_func", &M);
2904 Builder.SetInsertPoint(EntryBB);
2915 Type *ReduceListArgType = ReduceListArg->
getType();
2917 Type *LaneIDArgPtrType = Builder.getPtrTy(0);
2918 Value *ReduceListAlloca = Builder.CreateAlloca(
2919 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
2920 Value *LaneIdAlloca = Builder.CreateAlloca(LaneIDArgType,
nullptr,
2921 LaneIDArg->
getName() +
".addr");
2922 Value *RemoteLaneOffsetAlloca = Builder.CreateAlloca(
2923 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
2924 Value *AlgoVerAlloca = Builder.CreateAlloca(LaneIDArgType,
nullptr,
2925 AlgoVerArg->
getName() +
".addr");
2931 Instruction *RemoteReductionListAlloca = Builder.CreateAlloca(
2932 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
2934 Value *ReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2935 ReduceListAlloca, ReduceListArgType,
2936 ReduceListAlloca->
getName() +
".ascast");
2937 Value *LaneIdAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2938 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
2939 Value *RemoteLaneOffsetAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2940 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
2941 RemoteLaneOffsetAlloca->
getName() +
".ascast");
2942 Value *AlgoVerAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2943 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
2944 Value *RemoteListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2945 RemoteReductionListAlloca, Builder.getPtrTy(),
2946 RemoteReductionListAlloca->
getName() +
".ascast");
2948 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
2949 Builder.CreateStore(LaneIDArg, LaneIdAddrCast);
2950 Builder.CreateStore(RemoteLaneOffsetArg, RemoteLaneOffsetAddrCast);
2951 Builder.CreateStore(AlgoVerArg, AlgoVerAddrCast);
2953 Value *ReduceList = Builder.CreateLoad(ReduceListArgType, ReduceListAddrCast);
2954 Value *LaneId = Builder.CreateLoad(LaneIDArgType, LaneIdAddrCast);
2955 Value *RemoteLaneOffset =
2956 Builder.CreateLoad(LaneIDArgType, RemoteLaneOffsetAddrCast);
2957 Value *AlgoVer = Builder.CreateLoad(LaneIDArgType, AlgoVerAddrCast);
2964 emitReductionListCopy(
2965 AllocaIP, CopyAction::RemoteLaneToThread, RedListArrayTy, ReductionInfos,
2966 ReduceList, RemoteListAddrCast, {RemoteLaneOffset,
nullptr,
nullptr});
2989 Value *CondAlgo0 = Builder.CreateIsNull(AlgoVer);
2990 Value *Algo1 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(1));
2991 Value *LaneComp = Builder.CreateICmpULT(LaneId, RemoteLaneOffset);
2992 Value *CondAlgo1 = Builder.CreateAnd(Algo1, LaneComp);
2993 Value *Algo2 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(2));
2994 Value *LaneIdAnd1 = Builder.CreateAnd(LaneId, Builder.getInt16(1));
2995 Value *LaneIdComp = Builder.CreateIsNull(LaneIdAnd1);
2996 Value *Algo2AndLaneIdComp = Builder.CreateAnd(Algo2, LaneIdComp);
2997 Value *RemoteOffsetComp =
2998 Builder.CreateICmpSGT(RemoteLaneOffset, Builder.getInt16(0));
2999 Value *CondAlgo2 = Builder.CreateAnd(Algo2AndLaneIdComp, RemoteOffsetComp);
3000 Value *CA0OrCA1 = Builder.CreateOr(CondAlgo0, CondAlgo1);
3001 Value *CondReduce = Builder.CreateOr(CA0OrCA1, CondAlgo2);
3007 Builder.CreateCondBr(CondReduce, ThenBB, ElseBB);
3008 emitBlock(ThenBB, Builder.GetInsertBlock()->getParent());
3009 Value *LocalReduceListPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3010 ReduceList, Builder.getPtrTy());
3011 Value *RemoteReduceListPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3012 RemoteListAddrCast, Builder.getPtrTy());
3013 Builder.CreateCall(ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr})
3014 ->addFnAttr(Attribute::NoUnwind);
3015 Builder.CreateBr(MergeBB);
3017 emitBlock(ElseBB, Builder.GetInsertBlock()->getParent());
3018 Builder.CreateBr(MergeBB);
3020 emitBlock(MergeBB, Builder.GetInsertBlock()->getParent());
3024 Algo1 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(1));
3025 Value *LaneIdGtOffset = Builder.CreateICmpUGE(LaneId, RemoteLaneOffset);
3026 Value *CondCopy = Builder.CreateAnd(Algo1, LaneIdGtOffset);
3031 Builder.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
3033 emitBlock(CpyThenBB, Builder.GetInsertBlock()->getParent());
3034 emitReductionListCopy(AllocaIP, CopyAction::ThreadCopy, RedListArrayTy,
3035 ReductionInfos, RemoteListAddrCast, ReduceList);
3036 Builder.CreateBr(CpyMergeBB);
3038 emitBlock(CpyElseBB, Builder.GetInsertBlock()->getParent());
3039 Builder.CreateBr(CpyMergeBB);
3041 emitBlock(CpyMergeBB, Builder.GetInsertBlock()->getParent());
3043 Builder.CreateRetVoid();
3048Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
3050 AttributeList FuncAttrs) {
3051 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3054 Builder.getVoidTy(),
3055 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3059 "_omp_reduction_list_to_global_copy_func", &M);
3066 Builder.SetInsertPoint(EntryBlock);
3075 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3076 BufferArg->
getName() +
".addr");
3077 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3079 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3080 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3081 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3082 BufferArgAlloca, Builder.getPtrTy(),
3083 BufferArgAlloca->
getName() +
".ascast");
3084 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3085 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3086 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3087 ReduceListArgAlloca, Builder.getPtrTy(),
3088 ReduceListArgAlloca->
getName() +
".ascast");
3090 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3091 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3092 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3094 Value *LocalReduceList =
3095 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3096 Value *BufferArgVal =
3097 Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3098 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3099 Type *IndexTy = Builder.getIndexTy(
3100 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3101 for (
auto En :
enumerate(ReductionInfos)) {
3102 const ReductionInfo &RI = En.value();
3103 auto *RedListArrayTy =
3106 Value *ElemPtrPtr = Builder.CreateInBoundsGEP(
3107 RedListArrayTy, LocalReduceList,
3108 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3110 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
3114 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArgVal, Idxs);
3115 Value *GlobVal = Builder.CreateConstInBoundsGEP2_32(
3116 ReductionsBufferTy, BufferVD, 0, En.index());
3118 switch (RI.EvaluationKind) {
3119 case EvalKind::Scalar: {
3120 Value *TargetElement = Builder.CreateLoad(RI.ElementType, ElemPtr);
3121 Builder.CreateStore(TargetElement, GlobVal);
3124 case EvalKind::Complex: {
3125 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
3126 RI.ElementType, ElemPtr, 0, 0,
".realp");
3127 Value *SrcReal = Builder.CreateLoad(
3128 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3129 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
3130 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3131 Value *SrcImg = Builder.CreateLoad(
3132 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3134 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
3135 RI.ElementType, GlobVal, 0, 0,
".realp");
3136 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
3137 RI.ElementType, GlobVal, 0, 1,
".imagp");
3138 Builder.CreateStore(SrcReal, DestRealPtr);
3139 Builder.CreateStore(SrcImg, DestImgPtr);
3142 case EvalKind::Aggregate: {
3144 Builder.getInt64(M.getDataLayout().getTypeStoreSize(RI.ElementType));
3145 Builder.CreateMemCpy(
3146 GlobVal, M.getDataLayout().getPrefTypeAlign(RI.ElementType), ElemPtr,
3147 M.getDataLayout().getPrefTypeAlign(RI.ElementType), SizeVal,
false);
3153 Builder.CreateRetVoid();
3154 Builder.restoreIP(OldIP);
3158Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
3160 Type *ReductionsBufferTy, AttributeList FuncAttrs) {
3161 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3164 Builder.getVoidTy(),
3165 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3169 "_omp_reduction_list_to_global_reduce_func", &M);
3176 Builder.SetInsertPoint(EntryBlock);
3185 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3186 BufferArg->
getName() +
".addr");
3187 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3189 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3190 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3191 auto *RedListArrayTy =
3196 Value *LocalReduceList =
3197 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3199 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3200 BufferArgAlloca, Builder.getPtrTy(),
3201 BufferArgAlloca->
getName() +
".ascast");
3202 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3203 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3204 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3205 ReduceListArgAlloca, Builder.getPtrTy(),
3206 ReduceListArgAlloca->
getName() +
".ascast");
3207 Value *LocalReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3208 LocalReduceList, Builder.getPtrTy(),
3209 LocalReduceList->
getName() +
".ascast");
3211 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3212 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3213 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3215 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3216 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3217 Type *IndexTy = Builder.getIndexTy(
3218 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3219 for (
auto En :
enumerate(ReductionInfos)) {
3220 Value *TargetElementPtrPtr = Builder.CreateInBoundsGEP(
3221 RedListArrayTy, LocalReduceListAddrCast,
3222 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3224 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3226 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3227 ReductionsBufferTy, BufferVD, 0, En.index());
3228 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3233 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3234 Builder.CreateCall(ReduceFn, {LocalReduceListAddrCast, ReduceList})
3235 ->addFnAttr(Attribute::NoUnwind);
3236 Builder.CreateRetVoid();
3237 Builder.restoreIP(OldIP);
3241Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
3243 AttributeList FuncAttrs) {
3244 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3247 Builder.getVoidTy(),
3248 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3252 "_omp_reduction_global_to_list_copy_func", &M);
3259 Builder.SetInsertPoint(EntryBlock);
3268 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3269 BufferArg->
getName() +
".addr");
3270 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3272 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3273 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3274 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3275 BufferArgAlloca, Builder.getPtrTy(),
3276 BufferArgAlloca->
getName() +
".ascast");
3277 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3278 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3279 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3280 ReduceListArgAlloca, Builder.getPtrTy(),
3281 ReduceListArgAlloca->
getName() +
".ascast");
3282 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3283 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3284 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3286 Value *LocalReduceList =
3287 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3288 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3289 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3290 Type *IndexTy = Builder.getIndexTy(
3291 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3292 for (
auto En :
enumerate(ReductionInfos)) {
3293 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
3294 auto *RedListArrayTy =
3297 Value *ElemPtrPtr = Builder.CreateInBoundsGEP(
3298 RedListArrayTy, LocalReduceList,
3299 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3301 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
3304 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3305 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3306 ReductionsBufferTy, BufferVD, 0, En.index());
3308 switch (RI.EvaluationKind) {
3309 case EvalKind::Scalar: {
3310 Value *TargetElement = Builder.CreateLoad(RI.ElementType, GlobValPtr);
3311 Builder.CreateStore(TargetElement, ElemPtr);
3314 case EvalKind::Complex: {
3315 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
3316 RI.ElementType, GlobValPtr, 0, 0,
".realp");
3317 Value *SrcReal = Builder.CreateLoad(
3318 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3319 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
3320 RI.ElementType, GlobValPtr, 0, 1,
".imagp");
3321 Value *SrcImg = Builder.CreateLoad(
3322 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3324 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
3325 RI.ElementType, ElemPtr, 0, 0,
".realp");
3326 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
3327 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3328 Builder.CreateStore(SrcReal, DestRealPtr);
3329 Builder.CreateStore(SrcImg, DestImgPtr);
3332 case EvalKind::Aggregate: {
3334 Builder.getInt64(M.getDataLayout().getTypeStoreSize(RI.ElementType));
3335 Builder.CreateMemCpy(
3336 ElemPtr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3337 GlobValPtr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3344 Builder.CreateRetVoid();
3345 Builder.restoreIP(OldIP);
3349Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
3351 Type *ReductionsBufferTy, AttributeList FuncAttrs) {
3352 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3355 Builder.getVoidTy(),
3356 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3360 "_omp_reduction_global_to_list_reduce_func", &M);
3367 Builder.SetInsertPoint(EntryBlock);
3376 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3377 BufferArg->
getName() +
".addr");
3378 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3380 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3381 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3387 Value *LocalReduceList =
3388 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3390 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3391 BufferArgAlloca, Builder.getPtrTy(),
3392 BufferArgAlloca->
getName() +
".ascast");
3393 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3394 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3395 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3396 ReduceListArgAlloca, Builder.getPtrTy(),
3397 ReduceListArgAlloca->
getName() +
".ascast");
3398 Value *ReductionList = Builder.CreatePointerBitCastOrAddrSpaceCast(
3399 LocalReduceList, Builder.getPtrTy(),
3400 LocalReduceList->
getName() +
".ascast");
3402 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3403 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3404 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3406 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3407 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3408 Type *IndexTy = Builder.getIndexTy(
3409 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3410 for (
auto En :
enumerate(ReductionInfos)) {
3411 Value *TargetElementPtrPtr = Builder.CreateInBoundsGEP(
3412 RedListArrayTy, ReductionList,
3413 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3416 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3417 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3418 ReductionsBufferTy, BufferVD, 0, En.index());
3419 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3424 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3425 Builder.CreateCall(ReduceFn, {ReduceList, ReductionList})
3426 ->addFnAttr(Attribute::NoUnwind);
3427 Builder.CreateRetVoid();
3428 Builder.restoreIP(OldIP);
3432std::string OpenMPIRBuilder::getReductionFuncName(
StringRef Name)
const {
3433 std::string Suffix =
3434 createPlatformSpecificName({
"omp",
"reduction",
"reduction_func"});
3435 return (Name + Suffix).
str();
3440 ReductionGenCBKind ReductionGenCBKind, AttributeList FuncAttrs) {
3442 {Builder.getPtrTy(), Builder.getPtrTy()},
3444 std::string
Name = getReductionFuncName(ReducerName);
3452 Builder.SetInsertPoint(EntryBB);
3456 Value *LHSArrayPtr =
nullptr;
3457 Value *RHSArrayPtr =
nullptr;
3464 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
3466 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
3467 Value *LHSAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3468 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
3469 Value *RHSAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3470 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
3471 Builder.CreateStore(Arg0, LHSAddrCast);
3472 Builder.CreateStore(Arg1, RHSAddrCast);
3473 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
3474 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
3477 Type *IndexTy = Builder.getIndexTy(
3478 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3480 for (
auto En :
enumerate(ReductionInfos)) {
3481 const ReductionInfo &RI = En.value();
3482 Value *RHSI8PtrPtr = Builder.CreateInBoundsGEP(
3483 RedArrayTy, RHSArrayPtr,
3484 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3485 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
3486 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3487 RHSI8Ptr, RI.PrivateVariable->getType(),
3488 RHSI8Ptr->
getName() +
".ascast");
3490 Value *LHSI8PtrPtr = Builder.CreateInBoundsGEP(
3491 RedArrayTy, LHSArrayPtr,
3492 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3493 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
3494 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3495 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
3497 if (ReductionGenCBKind == ReductionGenCBKind::Clang) {
3501 Value *
LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
3502 Value *
RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
3504 InsertPointOrErrorTy AfterIP =
3505 RI.ReductionGen(Builder.saveIP(),
LHS,
RHS, Reduced);
3507 return AfterIP.takeError();
3508 if (!Builder.GetInsertBlock())
3509 return ReductionFunc;
3511 Builder.restoreIP(*AfterIP);
3512 Builder.CreateStore(Reduced, LHSPtr);
3516 if (ReductionGenCBKind == ReductionGenCBKind::Clang)
3517 for (
auto En :
enumerate(ReductionInfos)) {
3518 unsigned Index = En.index();
3519 const ReductionInfo &RI = En.value();
3520 Value *LHSFixupPtr, *RHSFixupPtr;
3521 Builder.restoreIP(RI.ReductionGenClang(
3522 Builder.saveIP(), Index, &LHSFixupPtr, &RHSFixupPtr, ReductionFunc));
3527 LHSPtrs[Index], [ReductionFunc](
const Use &U) {
3532 RHSPtrs[Index], [ReductionFunc](
const Use &U) {
3538 Builder.CreateRetVoid();
3539 return ReductionFunc;
3545 for (
const OpenMPIRBuilder::ReductionInfo &RI : ReductionInfos) {
3547 assert(RI.Variable &&
"expected non-null variable");
3548 assert(RI.PrivateVariable &&
"expected non-null private variable");
3549 assert((RI.ReductionGen || RI.ReductionGenClang) &&
3550 "expected non-null reduction generator callback");
3553 RI.Variable->getType() == RI.PrivateVariable->getType() &&
3554 "expected variables and their private equivalents to have the same "
3557 assert(RI.Variable->getType()->isPointerTy() &&
3558 "expected variables to be pointers");
3562OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU(
3563 const LocationDescription &
Loc, InsertPointTy AllocaIP,
3565 bool IsNoWait,
bool IsTeamsReduction, ReductionGenCBKind ReductionGenCBKind,
3566 std::optional<omp::GV> GridValue,
unsigned ReductionBufNum,
3567 Value *SrcLocInfo) {
3568 if (!updateToLocation(
Loc))
3569 return InsertPointTy();
3570 Builder.restoreIP(CodeGenIP);
3577 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
3578 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3581 if (ReductionInfos.
size() == 0)
3582 return Builder.saveIP();
3585 if (ReductionGenCBKind != ReductionGenCBKind::Clang) {
3591 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
3595 AttributeList FuncAttrs;
3596 AttrBuilder AttrBldr(Ctx);
3598 AttrBldr.addAttribute(Attr);
3599 AttrBldr.removeAttribute(Attribute::OptimizeNone);
3600 FuncAttrs = FuncAttrs.addFnAttributes(Ctx, AttrBldr);
3602 CodeGenIP = Builder.saveIP();
3604 createReductionFunction(Builder.GetInsertBlock()->getParent()->getName(),
3605 ReductionInfos, ReductionGenCBKind, FuncAttrs);
3606 if (!ReductionResult)
3608 Function *ReductionFunc = *ReductionResult;
3609 Builder.restoreIP(CodeGenIP);
3612 if (GridValue.has_value())
3613 Config.setGridValue(GridValue.value());
3628 Builder.getPtrTy(M.getDataLayout().getProgramAddressSpace());
3630 CodeGenIP = Builder.saveIP();
3631 Builder.restoreIP(AllocaIP);
3632 Value *ReductionListAlloca =
3633 Builder.CreateAlloca(RedArrayTy,
nullptr,
".omp.reduction.red_list");
3634 Value *ReductionList = Builder.CreatePointerBitCastOrAddrSpaceCast(
3635 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
3636 Builder.restoreIP(CodeGenIP);
3637 Type *IndexTy = Builder.getIndexTy(
3638 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3639 for (
auto En :
enumerate(ReductionInfos)) {
3640 const ReductionInfo &RI = En.value();
3641 Value *ElemPtr = Builder.CreateInBoundsGEP(
3642 RedArrayTy, ReductionList,
3643 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3645 Builder.CreatePointerBitCastOrAddrSpaceCast(RI.PrivateVariable, PtrTy);
3646 Builder.CreateStore(CastElem, ElemPtr);
3648 CodeGenIP = Builder.saveIP();
3650 emitShuffleAndReduceFunction(ReductionInfos, ReductionFunc, FuncAttrs);
3652 emitInterWarpCopyFunction(
Loc, ReductionInfos, FuncAttrs);
3656 Builder.restoreIP(CodeGenIP);
3658 Value *RL = Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, PtrTy);
3660 unsigned MaxDataSize = 0;
3662 for (
auto En :
enumerate(ReductionInfos)) {
3663 auto Size = M.getDataLayout().getTypeStoreSize(En.value().ElementType);
3664 if (
Size > MaxDataSize)
3666 ReductionTypeArgs.
emplace_back(En.value().ElementType);
3668 Value *ReductionDataSize =
3669 Builder.getInt64(MaxDataSize * ReductionInfos.
size());
3670 if (!IsTeamsReduction) {
3671 Value *SarFuncCast =
3672 Builder.CreatePointerBitCastOrAddrSpaceCast(SarFunc, FuncPtrTy);
3674 Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, FuncPtrTy);
3675 Value *
Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
3677 Function *Pv2Ptr = getOrCreateRuntimeFunctionPtr(
3678 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
3679 Res = Builder.CreateCall(Pv2Ptr, Args);
3681 CodeGenIP = Builder.saveIP();
3683 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
3684 Function *RedFixedBuferFn = getOrCreateRuntimeFunctionPtr(
3685 RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
3686 Function *LtGCFunc = emitListToGlobalCopyFunction(
3687 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3688 Function *LtGRFunc = emitListToGlobalReduceFunction(
3689 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3690 Function *GtLCFunc = emitGlobalToListCopyFunction(
3691 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3692 Function *GtLRFunc = emitGlobalToListReduceFunction(
3693 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3694 Builder.restoreIP(CodeGenIP);
3696 Value *KernelTeamsReductionPtr = Builder.CreateCall(
3697 RedFixedBuferFn, {},
"_openmp_teams_reductions_buffer_$_$ptr");
3699 Value *Args3[] = {SrcLocInfo,
3700 KernelTeamsReductionPtr,
3701 Builder.getInt32(ReductionBufNum),
3711 Function *TeamsReduceFn = getOrCreateRuntimeFunctionPtr(
3712 RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
3713 Res = Builder.CreateCall(TeamsReduceFn, Args3);
3719 Value *
Cond = Builder.CreateICmpEQ(Res, Builder.getInt32(1));
3720 Builder.CreateCondBr(
Cond, ThenBB, ExitBB);
3726 emitBlock(ThenBB, CurFunc);
3729 for (
auto En :
enumerate(ReductionInfos)) {
3730 const ReductionInfo &RI = En.value();
3733 Builder.CreatePointerBitCastOrAddrSpaceCast(RI.PrivateVariable, PtrTy);
3735 if (ReductionGenCBKind == ReductionGenCBKind::Clang) {
3736 Value *LHSPtr, *RHSPtr;
3737 Builder.restoreIP(RI.ReductionGenClang(Builder.saveIP(), En.index(),
3738 &LHSPtr, &RHSPtr, CurFunc));
3751 Value *LHSValue = Builder.CreateLoad(RI.ElementType,
LHS,
"final.lhs");
3752 Value *RHSValue = Builder.CreateLoad(RI.ElementType,
RHS,
"final.rhs");
3754 InsertPointOrErrorTy AfterIP =
3755 RI.ReductionGen(Builder.saveIP(), RHSValue, LHSValue, Reduced);
3757 return AfterIP.takeError();
3758 Builder.restoreIP(*AfterIP);
3759 Builder.CreateStore(Reduced,
LHS,
false);
3762 emitBlock(ExitBB, CurFunc);
3763 if (ContinuationBlock) {
3764 Builder.CreateBr(ContinuationBlock);
3765 Builder.SetInsertPoint(ContinuationBlock);
3767 Config.setEmitLLVMUsed();
3769 return Builder.saveIP();
3778 ".omp.reduction.func", &M);
3788 Builder.SetInsertPoint(ReductionFuncBlock);
3789 Value *LHSArrayPtr =
nullptr;
3790 Value *RHSArrayPtr =
nullptr;
3801 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
3803 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
3804 Value *LHSAddrCast =
3805 Builder.CreatePointerBitCastOrAddrSpaceCast(LHSAlloca, Arg0Type);
3806 Value *RHSAddrCast =
3807 Builder.CreatePointerBitCastOrAddrSpaceCast(RHSAlloca, Arg1Type);
3808 Builder.CreateStore(Arg0, LHSAddrCast);
3809 Builder.CreateStore(Arg1, RHSAddrCast);
3810 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
3811 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
3813 LHSArrayPtr = ReductionFunc->
getArg(0);
3814 RHSArrayPtr = ReductionFunc->
getArg(1);
3817 unsigned NumReductions = ReductionInfos.
size();
3820 for (
auto En :
enumerate(ReductionInfos)) {
3821 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
3822 Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
3823 RedArrayTy, LHSArrayPtr, 0, En.index());
3824 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
3825 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3826 LHSI8Ptr, RI.Variable->
getType());
3827 Value *
LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
3828 Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
3829 RedArrayTy, RHSArrayPtr, 0, En.index());
3830 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
3831 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3832 RHSI8Ptr, RI.PrivateVariable->
getType());
3833 Value *
RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
3835 OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
3836 RI.ReductionGen(Builder.saveIP(),
LHS,
RHS, Reduced);
3838 return AfterIP.takeError();
3840 Builder.restoreIP(*AfterIP);
3842 if (!Builder.GetInsertBlock())
3846 if (!IsByRef[En.index()])
3847 Builder.CreateStore(Reduced, LHSPtr);
3849 Builder.CreateRetVoid();
3853OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductions(
3854 const LocationDescription &
Loc, InsertPointTy AllocaIP,
3856 bool IsNoWait,
bool IsTeamsReduction) {
3859 return createReductionsGPU(
Loc, AllocaIP, Builder.saveIP(), ReductionInfos,
3860 IsNoWait, IsTeamsReduction);
3864 if (!updateToLocation(
Loc))
3865 return InsertPointTy();
3867 if (ReductionInfos.
size() == 0)
3868 return Builder.saveIP();
3877 unsigned NumReductions = ReductionInfos.
size();
3879 Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator());
3880 Value *RedArray = Builder.CreateAlloca(RedArrayTy,
nullptr,
"red.array");
3882 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
3884 for (
auto En :
enumerate(ReductionInfos)) {
3885 unsigned Index = En.index();
3886 const ReductionInfo &RI = En.value();
3887 Value *RedArrayElemPtr = Builder.CreateConstInBoundsGEP2_64(
3888 RedArrayTy, RedArray, 0, Index,
"red.array.elem." +
Twine(Index));
3889 Builder.CreateStore(RI.PrivateVariable, RedArrayElemPtr);
3894 Type *IndexTy = Builder.getIndexTy(
3895 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3896 Function *
Func = Builder.GetInsertBlock()->getParent();
3899 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
3900 bool CanGenerateAtomic =
all_of(ReductionInfos, [](
const ReductionInfo &RI) {
3901 return RI.AtomicReductionGen;
3903 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize,
3905 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
3907 Value *ThreadId = getOrCreateThreadID(Ident);
3908 Constant *NumVariables = Builder.getInt32(NumReductions);
3910 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
3911 Constant *RedArraySize = ConstantInt::get(IndexTy, RedArrayByteSize);
3913 Value *Lock = getOMPCriticalRegionLock(
".reduction");
3914 Function *ReduceFunc = getOrCreateRuntimeFunctionPtr(
3915 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
3916 : RuntimeFunction::OMPRTL___kmpc_reduce);
3918 Builder.CreateCall(ReduceFunc,
3919 {Ident, ThreadId, NumVariables, RedArraySize, RedArray,
3920 ReductionFunc, Lock},
3931 Builder.CreateSwitch(ReduceCall, ContinuationBlock, 2);
3932 Switch->addCase(Builder.getInt32(1), NonAtomicRedBlock);
3933 Switch->addCase(Builder.getInt32(2), AtomicRedBlock);
3938 Builder.SetInsertPoint(NonAtomicRedBlock);
3939 for (
auto En :
enumerate(ReductionInfos)) {
3940 const ReductionInfo &RI = En.value();
3944 Value *RedValue = RI.Variable;
3945 if (!IsByRef[En.index()]) {
3946 RedValue = Builder.CreateLoad(
ValueType, RI.Variable,
3947 "red.value." +
Twine(En.index()));
3949 Value *PrivateRedValue =
3950 Builder.CreateLoad(
ValueType, RI.PrivateVariable,
3951 "red.private.value." +
Twine(En.index()));
3953 InsertPointOrErrorTy AfterIP =
3954 RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced);
3956 return AfterIP.takeError();
3957 Builder.restoreIP(*AfterIP);
3959 if (!Builder.GetInsertBlock())
3960 return InsertPointTy();
3962 if (!IsByRef[En.index()])
3963 Builder.CreateStore(Reduced, RI.Variable);
3965 Function *EndReduceFunc = getOrCreateRuntimeFunctionPtr(
3966 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
3967 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
3968 Builder.CreateCall(EndReduceFunc, {Ident, ThreadId, Lock});
3969 Builder.CreateBr(ContinuationBlock);
3974 Builder.SetInsertPoint(AtomicRedBlock);
3975 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
3976 for (
const ReductionInfo &RI : ReductionInfos) {
3977 InsertPointOrErrorTy AfterIP = RI.AtomicReductionGen(
3978 Builder.saveIP(), RI.ElementType, RI.Variable, RI.PrivateVariable);
3980 return AfterIP.takeError();
3981 Builder.restoreIP(*AfterIP);
3982 if (!Builder.GetInsertBlock())
3983 return InsertPointTy();
3985 Builder.CreateBr(ContinuationBlock);
3987 Builder.CreateUnreachable();
3998 if (!Builder.GetInsertBlock())
3999 return InsertPointTy();
4001 Builder.SetInsertPoint(ContinuationBlock);
4002 return Builder.saveIP();
4005OpenMPIRBuilder::InsertPointOrErrorTy
4006OpenMPIRBuilder::createMaster(
const LocationDescription &
Loc,
4007 BodyGenCallbackTy BodyGenCB,
4008 FinalizeCallbackTy FiniCB) {
4009 if (!updateToLocation(
Loc))
4012 Directive OMPD = Directive::OMPD_master;
4014 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
4015 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4016 Value *ThreadId = getOrCreateThreadID(Ident);
4019 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master);
4020 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
4022 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master);
4023 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
4025 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4029OpenMPIRBuilder::InsertPointOrErrorTy
4030OpenMPIRBuilder::createMasked(
const LocationDescription &
Loc,
4031 BodyGenCallbackTy BodyGenCB,
4033 if (!updateToLocation(
Loc))
4036 Directive OMPD = Directive::OMPD_masked;
4038 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
4039 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4040 Value *ThreadId = getOrCreateThreadID(Ident);
4042 Value *ArgsEnd[] = {Ident, ThreadId};
4044 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked);
4045 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
4047 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked);
4048 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, ArgsEnd);
4050 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4060 Call->setDoesNotThrow();
4072OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
4073 const LocationDescription &
Loc, InsertPointTy AllocaIP,
4075 bool IsInclusive, ScanInfo *ScanRedInfo) {
4076 if (ScanRedInfo->OMPFirstScanLoop) {
4077 llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars,
4078 ScanVarsType, ScanRedInfo);
4082 if (!updateToLocation(
Loc))
4087 if (ScanRedInfo->OMPFirstScanLoop) {
4089 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4090 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]];
4091 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4092 Type *DestTy = ScanVarsType[i];
4093 Value *Val = Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4094 Value *Src = Builder.CreateLoad(DestTy, ScanVars[i]);
4096 Builder.CreateStore(Src, Val);
4099 Builder.CreateBr(ScanRedInfo->OMPScanLoopExit);
4100 emitBlock(ScanRedInfo->OMPScanDispatch,
4101 Builder.GetInsertBlock()->getParent());
4103 if (!ScanRedInfo->OMPFirstScanLoop) {
4104 IV = ScanRedInfo->IV;
4107 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4108 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]];
4109 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4110 Type *DestTy = ScanVarsType[i];
4112 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4113 Value *Src = Builder.CreateLoad(DestTy, SrcPtr);
4114 Builder.CreateStore(Src, ScanVars[i]);
4120 if (ScanRedInfo->OMPFirstScanLoop == IsInclusive) {
4121 Builder.CreateCondBr(CmpI, ScanRedInfo->OMPBeforeScanBlock,
4122 ScanRedInfo->OMPAfterScanBlock);
4124 Builder.CreateCondBr(CmpI, ScanRedInfo->OMPAfterScanBlock,
4125 ScanRedInfo->OMPBeforeScanBlock);
4127 emitBlock(ScanRedInfo->OMPAfterScanBlock,
4128 Builder.GetInsertBlock()->getParent());
4129 Builder.SetInsertPoint(ScanRedInfo->OMPAfterScanBlock);
4130 return Builder.saveIP();
4133Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
4137 Builder.restoreIP(AllocaIP);
4139 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4141 Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
"vla");
4142 (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]] = BuffPtr;
4146 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4147 InsertPointTy CodeGenIP) ->
Error {
4148 Builder.restoreIP(CodeGenIP);
4150 Builder.CreateAdd(ScanRedInfo->Span, Builder.getInt32(1));
4151 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4155 Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], Allocsize,
4156 AllocSpan,
nullptr,
"arr");
4157 Builder.CreateStore(Buff, (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]]);
4165 Builder.SetInsertPoint(ScanRedInfo->OMPScanInit->getTerminator());
4167 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4168 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4171 return AfterIP.takeError();
4172 Builder.restoreIP(*AfterIP);
4173 BasicBlock *InputBB = Builder.GetInsertBlock();
4175 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
4176 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4178 return AfterIP.takeError();
4179 Builder.restoreIP(*AfterIP);
4184Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
4186 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4187 InsertPointTy CodeGenIP) ->
Error {
4188 Builder.restoreIP(CodeGenIP);
4189 for (ReductionInfo RedInfo : ReductionInfos) {
4190 Value *PrivateVar = RedInfo.PrivateVariable;
4191 Value *OrigVar = RedInfo.Variable;
4192 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[PrivateVar];
4193 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4195 Type *SrcTy = RedInfo.ElementType;
4196 Value *Val = Builder.CreateInBoundsGEP(SrcTy, Buff, ScanRedInfo->Span,
4198 Value *Src = Builder.CreateLoad(SrcTy, Val);
4200 Builder.CreateStore(Src, OrigVar);
4201 Builder.CreateFree(Buff);
4209 if (ScanRedInfo->OMPScanFinish->getTerminator())
4210 Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish->getTerminator());
4212 Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish);
4215 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4216 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4219 return AfterIP.takeError();
4220 Builder.restoreIP(*AfterIP);
4221 BasicBlock *InputBB = Builder.GetInsertBlock();
4223 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
4224 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4226 return AfterIP.takeError();
4227 Builder.restoreIP(*AfterIP);
4231OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
4232 const LocationDescription &
Loc,
4234 ScanInfo *ScanRedInfo) {
4236 if (!updateToLocation(
Loc))
4238 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4239 InsertPointTy CodeGenIP) ->
Error {
4240 Builder.restoreIP(CodeGenIP);
4246 splitBB(Builder,
false,
"omp.outer.log.scan.exit");
4248 Builder.GetInsertBlock()->getModule(),
4252 Builder.CreateUIToFP(ScanRedInfo->Span, Builder.getDoubleTy());
4255 Builder.GetInsertBlock()->getModule(),
4258 LogVal = Builder.CreateFPToUI(LogVal, Builder.getInt32Ty());
4261 llvm::ConstantInt::get(ScanRedInfo->Span->getType(), 1));
4262 Builder.SetInsertPoint(InputBB);
4263 Builder.CreateBr(LoopBB);
4264 emitBlock(LoopBB, CurFn);
4265 Builder.SetInsertPoint(LoopBB);
4267 PHINode *Counter = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4269 PHINode *Pow2K = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4270 Counter->
addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 0),
4272 Pow2K->
addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 1),
4280 llvm::Value *CmpI = Builder.CreateICmpUGE(NMin1, Pow2K);
4281 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4282 emitBlock(InnerLoopBB, CurFn);
4283 Builder.SetInsertPoint(InnerLoopBB);
4284 PHINode *IVal = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4286 for (ReductionInfo RedInfo : ReductionInfos) {
4287 Value *ReductionVal = RedInfo.PrivateVariable;
4288 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ReductionVal];
4289 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4290 Type *DestTy = RedInfo.ElementType;
4291 Value *
IV = Builder.CreateAdd(IVal, Builder.getInt32(1));
4293 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4294 Value *OffsetIval = Builder.CreateNUWSub(
IV, Pow2K);
4296 Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval,
"arrayOffset");
4297 Value *
LHS = Builder.CreateLoad(DestTy, LHSPtr);
4298 Value *
RHS = Builder.CreateLoad(DestTy, RHSPtr);
4300 InsertPointOrErrorTy AfterIP =
4301 RedInfo.ReductionGen(Builder.saveIP(),
LHS,
RHS, Result);
4303 return AfterIP.takeError();
4304 Builder.CreateStore(Result, LHSPtr);
4307 IVal, llvm::ConstantInt::get(Builder.getInt32Ty(), 1));
4308 IVal->
addIncoming(NextIVal, Builder.GetInsertBlock());
4309 CmpI = Builder.CreateICmpUGE(NextIVal, Pow2K);
4310 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4311 emitBlock(InnerExitBB, CurFn);
4313 Counter, llvm::ConstantInt::get(Counter->
getType(), 1));
4316 llvm::Value *NextPow2K = Builder.CreateShl(Pow2K, 1,
"",
true);
4317 Pow2K->
addIncoming(NextPow2K, Builder.GetInsertBlock());
4319 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
4329 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4330 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4333 return AfterIP.takeError();
4334 Builder.restoreIP(*AfterIP);
4335 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4338 return AfterIP.takeError();
4339 Builder.restoreIP(*AfterIP);
4340 Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos, ScanRedInfo);
4347Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
4350 ScanInfo *ScanRedInfo) {
4358 ScanRedInfo->OMPFirstScanLoop =
true;
4359 Error Err = InputLoopGen();
4369 ScanRedInfo->OMPFirstScanLoop =
false;
4370 Error Err = ScanLoopGen(Builder.saveIP());
4377void OpenMPIRBuilder::createScanBBs(ScanInfo *ScanRedInfo) {
4378 Function *
Fun = Builder.GetInsertBlock()->getParent();
4379 ScanRedInfo->OMPScanDispatch =
4381 ScanRedInfo->OMPAfterScanBlock =
4383 ScanRedInfo->OMPBeforeScanBlock =
4385 ScanRedInfo->OMPScanLoopExit =
4388CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
4412 Builder.SetCurrentDebugLocation(
DL);
4414 Builder.SetInsertPoint(Preheader);
4415 Builder.CreateBr(Header);
4417 Builder.SetInsertPoint(Header);
4418 PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2,
"omp_" + Name +
".iv");
4419 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
4420 Builder.CreateBr(
Cond);
4422 Builder.SetInsertPoint(
Cond);
4424 Builder.CreateICmpULT(IndVarPHI, TripCount,
"omp_" + Name +
".cmp");
4425 Builder.CreateCondBr(Cmp, Body, Exit);
4427 Builder.SetInsertPoint(Body);
4428 Builder.CreateBr(Latch);
4430 Builder.SetInsertPoint(Latch);
4431 Value *
Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1),
4432 "omp_" + Name +
".next",
true);
4433 Builder.CreateBr(Header);
4436 Builder.SetInsertPoint(Exit);
4437 Builder.CreateBr(After);
4440 LoopInfos.emplace_front();
4441 CanonicalLoopInfo *CL = &LoopInfos.front();
4443 CL->Header = Header;
4455OpenMPIRBuilder::createCanonicalLoop(
const LocationDescription &
Loc,
4456 LoopBodyGenCallbackTy BodyGenCB,
4461 CanonicalLoopInfo *CL = createLoopSkeleton(
Loc.DL, TripCount, BB->
getParent(),
4462 NextBB, NextBB, Name);
4466 if (updateToLocation(
Loc)) {
4470 spliceBB(Builder, After,
false);
4471 Builder.CreateBr(CL->getPreheader());
4476 if (
Error Err = BodyGenCB(CL->getBodyIP(), CL->getIndVar()))
4486 ScanInfos.emplace_front();
4487 ScanInfo *
Result = &ScanInfos.front();
4492OpenMPIRBuilder::createCanonicalScanLoops(
4493 const LocationDescription &
Loc, LoopBodyGenCallbackTy BodyGenCB,
4494 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
4495 InsertPointTy ComputeIP,
const Twine &Name, ScanInfo *ScanRedInfo) {
4496 LocationDescription ComputeLoc =
4497 ComputeIP.isSet() ? LocationDescription(ComputeIP,
Loc.DL) :
Loc;
4498 updateToLocation(ComputeLoc);
4502 Value *TripCount = calculateCanonicalLoopTripCount(
4503 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
4504 ScanRedInfo->Span = TripCount;
4505 ScanRedInfo->OMPScanInit = splitBB(Builder,
true,
"scan.init");
4506 Builder.SetInsertPoint(ScanRedInfo->OMPScanInit);
4508 auto BodyGen = [=](InsertPointTy CodeGenIP,
Value *
IV) {
4509 Builder.restoreIP(CodeGenIP);
4510 ScanRedInfo->IV =
IV;
4511 createScanBBs(ScanRedInfo);
4512 BasicBlock *InputBlock = Builder.GetInsertBlock();
4516 Terminator->setSuccessor(0, ScanRedInfo->OMPScanDispatch);
4517 emitBlock(ScanRedInfo->OMPBeforeScanBlock,
4518 Builder.GetInsertBlock()->getParent());
4519 Builder.CreateBr(ScanRedInfo->OMPScanLoopExit);
4520 emitBlock(ScanRedInfo->OMPScanLoopExit,
4521 Builder.GetInsertBlock()->getParent());
4522 Builder.CreateBr(ContinueBlock);
4523 Builder.SetInsertPoint(
4524 ScanRedInfo->OMPBeforeScanBlock->getFirstInsertionPt());
4525 return BodyGenCB(Builder.saveIP(),
IV);
4528 const auto &&InputLoopGen = [&]() ->
Error {
4530 Builder.saveIP(), BodyGen, Start, Stop, Step, IsSigned, InclusiveStop,
4531 ComputeIP, Name,
true, ScanRedInfo);
4535 Builder.restoreIP((*LoopInfo)->getAfterIP());
4538 const auto &&ScanLoopGen = [&](LocationDescription
Loc) ->
Error {
4540 createCanonicalLoop(
Loc, BodyGen, Start, Stop, Step, IsSigned,
4541 InclusiveStop, ComputeIP, Name,
true, ScanRedInfo);
4545 Builder.restoreIP((*LoopInfo)->getAfterIP());
4546 ScanRedInfo->OMPScanFinish = Builder.GetInsertBlock();
4549 Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen, ScanRedInfo);
4555Value *OpenMPIRBuilder::calculateCanonicalLoopTripCount(
4557 bool IsSigned,
bool InclusiveStop,
const Twine &Name) {
4567 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
4568 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
4570 updateToLocation(
Loc);
4587 Value *IsNeg = Builder.CreateICmpSLT(Step, Zero);
4588 Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step);
4589 Value *LB = Builder.CreateSelect(IsNeg, Stop, Start);
4590 Value *UB = Builder.CreateSelect(IsNeg, Start, Stop);
4591 Span = Builder.CreateSub(UB, LB,
"",
false,
true);
4592 ZeroCmp = Builder.CreateICmp(
4595 Span = Builder.CreateSub(Stop, Start,
"",
true);
4596 ZeroCmp = Builder.CreateICmp(
4600 Value *CountIfLooping;
4601 if (InclusiveStop) {
4602 CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One);
4605 Value *CountIfTwo = Builder.CreateAdd(
4606 Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One);
4608 CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo);
4611 return Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
4612 "omp_" + Name +
".tripcount");
4616 const LocationDescription &
Loc, LoopBodyGenCallbackTy BodyGenCB,
4617 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
4618 InsertPointTy ComputeIP,
const Twine &Name,
bool InScan,
4619 ScanInfo *ScanRedInfo) {
4620 LocationDescription ComputeLoc =
4621 ComputeIP.isSet() ? LocationDescription(ComputeIP,
Loc.DL) :
Loc;
4623 Value *TripCount = calculateCanonicalLoopTripCount(
4624 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
4626 auto BodyGen = [=](InsertPointTy CodeGenIP,
Value *
IV) {
4627 Builder.restoreIP(CodeGenIP);
4628 Value *Span = Builder.CreateMul(
IV, Step);
4629 Value *IndVar = Builder.CreateAdd(Span, Start);
4631 ScanRedInfo->IV = IndVar;
4632 return BodyGenCB(Builder.saveIP(), IndVar);
4634 LocationDescription LoopLoc =
4637 : LocationDescription(Builder.saveIP(),
4638 Builder.getCurrentDebugLocation());
4639 return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
4648 OpenMPIRBuilder &OMPBuilder) {
4649 unsigned Bitwidth = Ty->getIntegerBitWidth();
4651 return OMPBuilder.getOrCreateRuntimeFunction(
4652 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
4654 return OMPBuilder.getOrCreateRuntimeFunction(
4655 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
4664 OpenMPIRBuilder &OMPBuilder) {
4665 unsigned Bitwidth = Ty->getIntegerBitWidth();
4667 return OMPBuilder.getOrCreateRuntimeFunction(
4668 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
4670 return OMPBuilder.getOrCreateRuntimeFunction(
4671 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
4675OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop(
4676 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
4678 assert(CLI->isValid() &&
"Requires a valid canonical loop");
4680 "Require dedicated allocate IP");
4683 Builder.restoreIP(CLI->getPreheaderIP());
4684 Builder.SetCurrentDebugLocation(
DL);
4687 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
4688 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4692 Type *IVTy =
IV->getType();
4694 LoopType == WorksharingLoopType::DistributeForStaticLoop
4698 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
4701 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
4704 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
4705 Value *PLowerBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
4706 Value *PUpperBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
4707 Value *PStride = Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
4708 CLI->setLastIter(PLastIter);
4714 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
4716 Constant *One = ConstantInt::get(IVTy, 1);
4717 Builder.CreateStore(Zero, PLowerBound);
4718 Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One);
4719 Builder.CreateStore(UpperBound, PUpperBound);
4720 Builder.CreateStore(One, PStride);
4722 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
4725 (LoopType == WorksharingLoopType::DistributeStaticLoop)
4726 ? OMPScheduleType::OrderedDistribute
4729 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
4734 {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, PUpperBound});
4735 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
4736 Value *PDistUpperBound =
4737 Builder.CreateAlloca(IVTy,
nullptr,
"p.distupperbound");
4738 Args.push_back(PDistUpperBound);
4741 Builder.CreateCall(StaticInit, Args);
4742 Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
4743 Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
4744 Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
4745 Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One);
4746 CLI->setTripCount(TripCount);
4753 Builder.SetInsertPoint(CLI->getBody(),
4754 CLI->getBody()->getFirstInsertionPt());
4755 Builder.SetCurrentDebugLocation(
DL);
4756 return Builder.CreateAdd(OldIV, LowerBound);
4760 Builder.SetInsertPoint(CLI->getExit(),
4761 CLI->getExit()->getTerminator()->getIterator());
4762 Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
4766 InsertPointOrErrorTy BarrierIP =
4767 createBarrier(LocationDescription(Builder.saveIP(),
DL),
4768 omp::Directive::OMPD_for,
false,
4771 return BarrierIP.takeError();
4774 InsertPointTy AfterIP = CLI->getAfterIP();
4780OpenMPIRBuilder::InsertPointOrErrorTy
4781OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
DebugLoc DL,
4782 CanonicalLoopInfo *CLI,
4783 InsertPointTy AllocaIP,
4786 assert(CLI->isValid() &&
"Requires a valid canonical loop");
4787 assert(ChunkSize &&
"Chunk size is required");
4789 LLVMContext &Ctx = CLI->getFunction()->getContext();
4791 Value *OrigTripCount = CLI->getTripCount();
4792 Type *IVTy =
IV->getType();
4794 "Max supported tripcount bitwidth is 64 bits");
4796 :
Type::getInt64Ty(Ctx);
4799 Constant *One = ConstantInt::get(InternalIVTy, 1);
4805 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
4808 Builder.restoreIP(AllocaIP);
4809 Builder.SetCurrentDebugLocation(
DL);
4810 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
4811 Value *PLowerBound =
4812 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.lowerbound");
4813 Value *PUpperBound =
4814 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.upperbound");
4815 Value *PStride = Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.stride");
4816 CLI->setLastIter(PLastIter);
4819 Builder.restoreIP(CLI->getPreheaderIP());
4820 Builder.SetCurrentDebugLocation(
DL);
4823 Value *CastedChunkSize =
4824 Builder.CreateZExtOrTrunc(ChunkSize, InternalIVTy,
"chunksize");
4825 Value *CastedTripCount =
4826 Builder.CreateZExt(OrigTripCount, InternalIVTy,
"tripcount");
4828 Constant *SchedulingType = ConstantInt::get(
4829 I32Type,
static_cast<int>(OMPScheduleType::UnorderedStaticChunked));
4830 Builder.CreateStore(Zero, PLowerBound);
4831 Value *OrigUpperBound = Builder.CreateSub(CastedTripCount, One);
4832 Builder.CreateStore(OrigUpperBound, PUpperBound);
4833 Builder.CreateStore(One, PStride);
4838 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
4839 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4840 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
4841 Builder.CreateCall(StaticInit,
4843 SchedulingType, PLastIter,
4844 PLowerBound, PUpperBound,
4849 Value *FirstChunkStart =
4850 Builder.CreateLoad(InternalIVTy, PLowerBound,
"omp_firstchunk.lb");
4851 Value *FirstChunkStop =
4852 Builder.CreateLoad(InternalIVTy, PUpperBound,
"omp_firstchunk.ub");
4853 Value *FirstChunkEnd = Builder.CreateAdd(FirstChunkStop, One);
4855 Builder.CreateSub(FirstChunkEnd, FirstChunkStart,
"omp_chunk.range");
4856 Value *NextChunkStride =
4857 Builder.CreateLoad(InternalIVTy, PStride,
"omp_dispatch.stride");
4860 BasicBlock *DispatchEnter = splitBB(Builder,
true);
4861 Value *DispatchCounter;
4866 CanonicalLoopInfo *DispatchCLI =
cantFail(createCanonicalLoop(
4867 {Builder.saveIP(),
DL},
4868 [&](InsertPointTy BodyIP,
Value *Counter) {
4869 DispatchCounter = Counter;
4872 FirstChunkStart, CastedTripCount, NextChunkStride,
4878 BasicBlock *DispatchBody = DispatchCLI->getBody();
4879 BasicBlock *DispatchLatch = DispatchCLI->getLatch();
4880 BasicBlock *DispatchExit = DispatchCLI->getExit();
4881 BasicBlock *DispatchAfter = DispatchCLI->getAfter();
4882 DispatchCLI->invalidate();
4890 Builder.restoreIP(CLI->getPreheaderIP());
4891 Builder.SetCurrentDebugLocation(
DL);
4894 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
4895 Value *ChunkEnd = Builder.CreateAdd(DispatchCounter, ChunkRange);
4896 Value *IsLastChunk =
4897 Builder.CreateICmpUGE(ChunkEnd, CastedTripCount,
"omp_chunk.is_last");
4898 Value *CountUntilOrigTripCount =
4899 Builder.CreateSub(CastedTripCount, DispatchCounter);
4900 Value *ChunkTripCount = Builder.CreateSelect(
4901 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
4902 Value *BackcastedChunkTC =
4903 Builder.CreateTrunc(ChunkTripCount, IVTy,
"omp_chunk.tripcount.trunc");
4904 CLI->setTripCount(BackcastedChunkTC);
4909 Value *BackcastedDispatchCounter =
4910 Builder.CreateTrunc(DispatchCounter, IVTy,
"omp_dispatch.iv.trunc");
4912 Builder.restoreIP(CLI->getBodyIP());
4913 return Builder.CreateAdd(
IV, BackcastedDispatchCounter);
4918 Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
4922 InsertPointOrErrorTy AfterIP =
4923 createBarrier(LocationDescription(Builder.saveIP(),
DL), OMPD_for,
4926 return AfterIP.takeError();
4944 unsigned Bitwidth = Ty->getIntegerBitWidth();
4945 Module &M = OMPBuilder->M;
4947 case WorksharingLoopType::ForStaticLoop:
4949 return OMPBuilder->getOrCreateRuntimeFunction(
4950 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
4952 return OMPBuilder->getOrCreateRuntimeFunction(
4953 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
4955 case WorksharingLoopType::DistributeStaticLoop:
4957 return OMPBuilder->getOrCreateRuntimeFunction(
4958 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
4960 return OMPBuilder->getOrCreateRuntimeFunction(
4961 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
4963 case WorksharingLoopType::DistributeForStaticLoop:
4965 return OMPBuilder->getOrCreateRuntimeFunction(
4966 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
4968 return OMPBuilder->getOrCreateRuntimeFunction(
4969 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
4972 if (Bitwidth != 32 && Bitwidth != 64) {
4984 Function &LoopBodyFn,
bool NoLoop) {
4986 Module &M = OMPBuilder->M;
4995 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
4996 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
4997 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
4998 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
4999 Builder.CreateCall(RTLFn, RealArgs);
5002 FunctionCallee RTLNumThreads = OMPBuilder->getOrCreateRuntimeFunction(
5003 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
5004 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5005 Value *NumThreads = Builder.CreateCall(RTLNumThreads, {});
5008 Builder.CreateZExtOrTrunc(NumThreads, TripCountTy,
"num.threads.cast"));
5009 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5010 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5011 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5012 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), NoLoop));
5014 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5017 Builder.CreateCall(RTLFn, RealArgs);
5021 OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI,
Value *Ident,
5026 Value *TripCount = CLI->getTripCount();
5032 Preheader->
splice(std::prev(Preheader->
end()), CLI->getBody(),
5033 CLI->getBody()->begin(), std::prev(CLI->getBody()->end()));
5038 Builder.restoreIP({Preheader, Preheader->
end()});
5041 Builder.CreateBr(CLI->getExit());
5044 OpenMPIRBuilder::OutlineInfo CleanUpInfo;
5047 CleanUpInfo.EntryBB = CLI->getHeader();
5048 CleanUpInfo.ExitBB = CLI->getExit();
5049 CleanUpInfo.collectBlocks(RegionBlockSet, BlocksToBeRemoved);
5057 "Expected unique undroppable user of outlined function");
5059 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
5061 "Expected outlined function call to be located in loop preheader");
5063 if (OutlinedFnCallInstruction->
arg_size() > 1)
5070 LoopBodyArg, TripCount, OutlinedFn, NoLoop);
5072 for (
auto &ToBeDeletedItem : ToBeDeleted)
5073 ToBeDeletedItem->eraseFromParent();
5077OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyWorkshareLoopTarget(
5078 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
5081 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
5082 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
5085 OI.OuterAllocaBB = CLI->getPreheader();
5091 OI.OuterAllocaBB = AllocaIP.getBlock();
5094 OI.EntryBB = CLI->getBody();
5095 OI.ExitBB = CLI->getLatch()->splitBasicBlock(CLI->getLatch()->begin(),
5096 "omp.prelatch",
true);
5099 Builder.restoreIP({CLI->getPreheader(), CLI->getPreheader()->begin()});
5103 AllocaInst *NewLoopCnt = Builder.CreateAlloca(CLI->getIndVarType(), 0,
"");
5105 Builder.CreateLoad(CLI->getIndVarType(), NewLoopCnt);
5116 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
5127 CLI->getPreheader(),
5136 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
5142 CLI->getIndVar()->user_end());
5145 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
5146 Inst->replaceUsesOfWith(CLI->getIndVar(), NewLoopCntLoad);
5152 OI.ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
5159 OI.PostOutlineCB = [=, ToBeDeletedVec =
5160 std::move(ToBeDeleted)](
Function &OutlinedFn) {
5164 addOutlineInfo(std::move(OI));
5165 return CLI->getAfterIP();
5168OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyWorkshareLoop(
5169 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
5170 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
5171 bool HasSimdModifier,
bool HasMonotonicModifier,
5172 bool HasNonmonotonicModifier,
bool HasOrderedClause,
5174 if (Config.isTargetDevice())
5175 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType, NoLoop);
5177 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
5178 HasNonmonotonicModifier, HasOrderedClause);
5180 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
5181 OMPScheduleType::ModifierOrdered;
5182 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
5183 case OMPScheduleType::BaseStatic:
5184 assert(!ChunkSize &&
"No chunk size with static-chunked schedule");
5186 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5187 NeedsBarrier, ChunkSize);
5189 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, LoopType, NeedsBarrier);
5191 case OMPScheduleType::BaseStaticChunked:
5193 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5194 NeedsBarrier, ChunkSize);
5196 return applyStaticChunkedWorkshareLoop(
DL, CLI, AllocaIP, NeedsBarrier,
5199 case OMPScheduleType::BaseRuntime:
5200 case OMPScheduleType::BaseAuto:
5201 case OMPScheduleType::BaseGreedy:
5202 case OMPScheduleType::BaseBalanced:
5203 case OMPScheduleType::BaseSteal:
5204 case OMPScheduleType::BaseGuidedSimd:
5205 case OMPScheduleType::BaseRuntimeSimd:
5207 "schedule type does not support user-defined chunk sizes");
5209 case OMPScheduleType::BaseDynamicChunked:
5210 case OMPScheduleType::BaseGuidedChunked:
5211 case OMPScheduleType::BaseGuidedIterativeChunked:
5212 case OMPScheduleType::BaseGuidedAnalyticalChunked:
5213 case OMPScheduleType::BaseStaticBalancedChunked:
5214 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5215 NeedsBarrier, ChunkSize);
5228 unsigned Bitwidth = Ty->getIntegerBitWidth();
5230 return OMPBuilder.getOrCreateRuntimeFunction(
5231 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
5233 return OMPBuilder.getOrCreateRuntimeFunction(
5234 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
5244 unsigned Bitwidth = Ty->getIntegerBitWidth();
5246 return OMPBuilder.getOrCreateRuntimeFunction(
5247 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
5249 return OMPBuilder.getOrCreateRuntimeFunction(
5250 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
5259 unsigned Bitwidth = Ty->getIntegerBitWidth();
5261 return OMPBuilder.getOrCreateRuntimeFunction(
5262 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
5264 return OMPBuilder.getOrCreateRuntimeFunction(
5265 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
5269OpenMPIRBuilder::InsertPointOrErrorTy
5270OpenMPIRBuilder::applyDynamicWorkshareLoop(
DebugLoc DL, CanonicalLoopInfo *CLI,
5271 InsertPointTy AllocaIP,
5273 bool NeedsBarrier,
Value *Chunk) {
5274 assert(CLI->isValid() &&
"Requires a valid canonical loop");
5276 "Require dedicated allocate IP");
5278 "Require valid schedule type");
5280 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
5281 OMPScheduleType::ModifierOrdered;
5284 Builder.SetCurrentDebugLocation(
DL);
5287 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
5288 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
5292 Type *IVTy =
IV->getType();
5297 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
5299 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5300 Value *PLowerBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
5301 Value *PUpperBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
5302 Value *PStride = Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
5303 CLI->setLastIter(PLastIter);
5311 Constant *One = ConstantInt::get(IVTy, 1);
5312 Builder.CreateStore(One, PLowerBound);
5313 Value *UpperBound = CLI->getTripCount();
5314 Builder.CreateStore(UpperBound, PUpperBound);
5315 Builder.CreateStore(One, PStride);
5321 InsertPointTy AfterIP = CLI->getAfterIP();
5329 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
5332 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5335 Builder.CreateCall(DynamicInit,
5336 {SrcLoc, ThreadNum, SchedulingType, One,
5337 UpperBound, One, Chunk});
5346 Builder.CreateCall(DynamicNext, {SrcLoc, ThreadNum, PLastIter,
5347 PLowerBound, PUpperBound, PStride});
5348 Constant *Zero32 = ConstantInt::get(I32Type, 0);
5351 Builder.CreateSub(Builder.CreateLoad(IVTy, PLowerBound), One,
"lb");
5352 Builder.CreateCondBr(MoreWork, Header, Exit);
5358 PI->setIncomingBlock(0, OuterCond);
5359 PI->setIncomingValue(0, LowerBound);
5364 Br->setSuccessor(0, OuterCond);
5369 Builder.SetInsertPoint(
Cond,
Cond->getFirstInsertionPt());
5370 UpperBound = Builder.CreateLoad(IVTy, PUpperBound,
"ub");
5377 assert(BI->getSuccessor(1) == Exit);
5378 BI->setSuccessor(1, OuterCond);
5382 Builder.SetInsertPoint(&Latch->
back());
5384 Builder.CreateCall(DynamicFini, {SrcLoc, ThreadNum});
5389 Builder.SetInsertPoint(&
Exit->back());
5390 InsertPointOrErrorTy BarrierIP =
5391 createBarrier(LocationDescription(Builder.saveIP(),
DL),
5392 omp::Directive::OMPD_for,
false,
5395 return BarrierIP.takeError();
5414 auto HasRemainingUses = [&BBsToErase](
BasicBlock *BB) {
5419 if (BBsToErase.
count(UseInst->getParent()))
5426 while (BBsToErase.
remove_if(HasRemainingUses)) {
5436 InsertPointTy ComputeIP) {
5437 assert(
Loops.size() >= 1 &&
"At least one loop required");
5438 size_t NumLoops =
Loops.size();
5442 return Loops.front();
5444 CanonicalLoopInfo *Outermost =
Loops.front();
5445 CanonicalLoopInfo *Innermost =
Loops.back();
5446 BasicBlock *OrigPreheader = Outermost->getPreheader();
5447 BasicBlock *OrigAfter = Outermost->getAfter();
5454 Loop->collectControlBlocks(OldControlBBs);
5457 Builder.SetCurrentDebugLocation(
DL);
5458 if (ComputeIP.isSet())
5459 Builder.restoreIP(ComputeIP);
5461 Builder.restoreIP(Outermost->getPreheaderIP());
5465 Value *CollapsedTripCount =
nullptr;
5466 for (CanonicalLoopInfo *L :
Loops) {
5468 "All loops to collapse must be valid canonical loops");
5469 Value *OrigTripCount =
L->getTripCount();
5470 if (!CollapsedTripCount) {
5471 CollapsedTripCount = OrigTripCount;
5476 CollapsedTripCount =
5477 Builder.CreateNUWMul(CollapsedTripCount, OrigTripCount);
5481 CanonicalLoopInfo *
Result =
5482 createLoopSkeleton(
DL, CollapsedTripCount,
F,
5483 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
5489 Builder.restoreIP(
Result->getBodyIP());
5493 NewIndVars.
resize(NumLoops);
5494 for (
int i = NumLoops - 1; i >= 1; --i) {
5495 Value *OrigTripCount =
Loops[i]->getTripCount();
5497 Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount);
5498 NewIndVars[i] = NewIndVar;
5500 Leftover = Builder.CreateUDiv(Leftover, OrigTripCount);
5503 NewIndVars[0] = Leftover;
5514 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
5521 ContinueBlock =
nullptr;
5522 ContinuePred = NextSrc;
5529 for (
size_t i = 0; i < NumLoops - 1; ++i)
5530 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
5533 ContinueWith(Innermost->getBody(), Innermost->getLatch());
5536 for (
size_t i = NumLoops - 1; i > 0; --i)
5537 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
5540 ContinueWith(
Result->getLatch(),
nullptr);
5547 for (
size_t i = 0; i < NumLoops; ++i)
5548 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
5553 for (CanonicalLoopInfo *L :
Loops)
5562std::vector<CanonicalLoopInfo *>
5566 "Must pass as many tile sizes as there are loops");
5567 int NumLoops =
Loops.size();
5568 assert(NumLoops >= 1 &&
"At least one loop to tile required");
5570 CanonicalLoopInfo *OutermostLoop =
Loops.front();
5571 CanonicalLoopInfo *InnermostLoop =
Loops.back();
5572 Function *
F = OutermostLoop->getBody()->getParent();
5573 BasicBlock *InnerEnter = InnermostLoop->getBody();
5574 BasicBlock *InnerLatch = InnermostLoop->getLatch();
5580 Loop->collectControlBlocks(OldControlBBs);
5587 for (CanonicalLoopInfo *L :
Loops) {
5588 assert(
L->isValid() &&
"All input loops must be valid canonical loops");
5600 for (
int i = 0; i < NumLoops - 1; ++i) {
5601 CanonicalLoopInfo *Surrounding =
Loops[i];
5604 BasicBlock *EnterBB = Surrounding->getBody();
5610 Builder.SetCurrentDebugLocation(
DL);
5611 Builder.restoreIP(OutermostLoop->getPreheaderIP());
5613 for (
int i = 0; i < NumLoops; ++i) {
5615 Value *OrigTripCount = OrigTripCounts[i];
5618 Value *FloorCompleteTripCount = Builder.CreateUDiv(OrigTripCount,
TileSize);
5619 Value *FloorTripRem = Builder.CreateURem(OrigTripCount,
TileSize);
5628 Value *FloorTripOverflow =
5629 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
5631 FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
5632 Value *FloorTripCount =
5633 Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow,
5634 "omp_floor" +
Twine(i) +
".tripcount",
true);
5637 FloorCompleteCount.
push_back(FloorCompleteTripCount);
5643 std::vector<CanonicalLoopInfo *>
Result;
5644 Result.reserve(NumLoops * 2);
5648 BasicBlock *Enter = OutermostLoop->getPreheader();
5655 BasicBlock *OutroInsertBefore = InnermostLoop->getExit();
5657 auto EmbeddNewLoop =
5658 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
5660 CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton(
5661 DL, TripCount,
F, InnerEnter, OutroInsertBefore, Name);
5666 Enter = EmbeddedLoop->getBody();
5667 Continue = EmbeddedLoop->getLatch();
5668 OutroInsertBefore = EmbeddedLoop->getLatch();
5669 return EmbeddedLoop;
5673 const Twine &NameBase) {
5675 CanonicalLoopInfo *EmbeddedLoop =
5676 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
5677 Result.push_back(EmbeddedLoop);
5681 EmbeddNewLoops(FloorCount,
"floor");
5685 Builder.SetInsertPoint(Enter->getTerminator());
5687 for (
int i = 0; i < NumLoops; ++i) {
5688 CanonicalLoopInfo *FloorLoop =
Result[i];
5691 Value *FloorIsEpilogue =
5692 Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCompleteCount[i]);
5693 Value *TileTripCount =
5694 Builder.CreateSelect(FloorIsEpilogue, FloorRems[i],
TileSize);
5700 EmbeddNewLoops(TileCounts,
"tile");
5705 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
5714 BodyEnter =
nullptr;
5715 BodyEntered = ExitBB;
5727 Builder.restoreIP(
Result.back()->getBodyIP());
5728 for (
int i = 0; i < NumLoops; ++i) {
5729 CanonicalLoopInfo *FloorLoop =
Result[i];
5730 CanonicalLoopInfo *TileLoop =
Result[NumLoops + i];
5731 Value *OrigIndVar = OrigIndVars[i];
5735 Builder.CreateMul(
Size, FloorLoop->getIndVar(), {},
true);
5737 Builder.CreateAdd(Scale, TileLoop->getIndVar(), {},
true);
5744 for (CanonicalLoopInfo *L :
Loops)
5748 for (CanonicalLoopInfo *GenL : Result)
5759 if (Properties.
empty())
5782 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
5786 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
5794 if (
I.mayReadOrWriteMemory()) {
5798 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
5803void OpenMPIRBuilder::unrollLoopFull(
DebugLoc, CanonicalLoopInfo *
Loop) {
5810void OpenMPIRBuilder::unrollLoopHeuristic(
DebugLoc, CanonicalLoopInfo *
Loop) {
5818void OpenMPIRBuilder::createIfVersion(CanonicalLoopInfo *CanonicalLoop,
5821 const Twine &NamePrefix) {
5822 Function *
F = CanonicalLoop->getFunction();
5844 auto SplitBeforeIt = CanonicalLoop->getBody()->getFirstNonPHIIt();
5850 C, NamePrefix +
".if.then",
Cond->getParent(),
Cond->getNextNode());
5852 C, NamePrefix +
".if.else",
Cond->getParent(), CanonicalLoop->getExit());
5855 Builder.SetInsertPoint(SplitBeforeIt);
5857 Builder.CreateCondBr(IfCond, ThenBlock, ElseBlock);
5860 spliceBB(IP, ThenBlock,
false, Builder.getCurrentDebugLocation());
5863 Builder.SetInsertPoint(ElseBlock);
5869 ExistingBlocks.
reserve(
L->getNumBlocks() + 1);
5871 ExistingBlocks.
append(
L->block_begin(),
L->block_end());
5877 assert(LoopCond && LoopHeader &&
"Invalid loop structure");
5879 if (
Block ==
L->getLoopPreheader() ||
Block ==
L->getLoopLatch() ||
5886 if (
Block == ThenBlock)
5887 NewBB->
setName(NamePrefix +
".if.else");
5890 VMap[
Block] = NewBB;
5894 Builder.CreateBr(NewBlocks.
front());
5898 L->getLoopLatch()->splitBasicBlock(
5899 L->getLoopLatch()->begin(), NamePrefix +
".pre_latch",
true);
5903 L->addBasicBlockToLoop(ThenBlock, LI);
5907OpenMPIRBuilder::getOpenMPDefaultSimdAlign(
const Triple &TargetTriple,
5909 if (TargetTriple.
isX86()) {
5910 if (Features.
lookup(
"avx512f"))
5912 else if (Features.
lookup(
"avx"))
5916 if (TargetTriple.
isPPC())
5918 if (TargetTriple.
isWasm())
5923void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop,
5925 Value *IfCond, OrderKind Order,
5929 Function *
F = CanonicalLoop->getFunction();
5944 if (AlignedVars.
size()) {
5945 InsertPointTy IP = Builder.saveIP();
5946 for (
auto &AlignedItem : AlignedVars) {
5947 Value *AlignedPtr = AlignedItem.first;
5948 Value *Alignment = AlignedItem.second;
5951 Builder.CreateAlignmentAssumption(
F->getDataLayout(), AlignedPtr,
5954 Builder.restoreIP(IP);
5959 createIfVersion(CanonicalLoop, IfCond, VMap, LIA, LI, L,
"simd");
5969 if (
Block == CanonicalLoop->getCond() ||
5970 Block == CanonicalLoop->getHeader())
5972 Reachable.insert(
Block);
5982 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent)) {
5990 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
6006 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
6008 if (Simdlen || Safelen) {
6012 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
6038static std::unique_ptr<TargetMachine>
6042 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
6043 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
6054 std::nullopt, OptLevel));
6078 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
6079 FAM.registerPass([&]() {
return TIRA; });
6093 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
6098 nullptr, ORE,
static_cast<int>(OptLevel),
6119 <<
" Threshold=" << UP.
Threshold <<
"\n"
6122 <<
" PartialOptSizeThreshold="
6142 Ptr = Load->getPointerOperand();
6144 Ptr = Store->getPointerOperand();
6148 Ptr =
Ptr->stripPointerCasts();
6151 if (Alloca->getParent() == &
F->getEntryBlock())
6171 int MaxTripCount = 0;
6172 bool MaxOrZero =
false;
6173 unsigned TripMultiple = 0;
6175 bool UseUpperBound =
false;
6177 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP,
6179 unsigned Factor = UP.
Count;
6180 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
6188void OpenMPIRBuilder::unrollLoopPartial(
DebugLoc DL, CanonicalLoopInfo *
Loop,
6190 CanonicalLoopInfo **UnrolledCLI) {
6191 assert(Factor >= 0 &&
"Unroll factor must not be negative");
6207 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
6220 *UnrolledCLI =
Loop;
6225 "unrolling only makes sense with a factor of 2 or larger");
6227 Type *IndVarTy =
Loop->getIndVarType();
6234 std::vector<CanonicalLoopInfo *>
LoopNest =
6235 tileLoops(
DL, {
Loop}, {FactorVal});
6238 CanonicalLoopInfo *InnerLoop =
LoopNest[1];
6249 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
6252 (*UnrolledCLI)->assertOK();
6256OpenMPIRBuilder::InsertPointTy
6257OpenMPIRBuilder::createCopyPrivate(
const LocationDescription &
Loc,
6260 if (!updateToLocation(
Loc))
6264 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6265 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6266 Value *ThreadId = getOrCreateThreadID(Ident);
6268 llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt);
6270 Value *
Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
6272 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate);
6273 Builder.CreateCall(Fn, Args);
6275 return Builder.saveIP();
6278OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSingle(
6279 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6283 if (!updateToLocation(
Loc))
6289 if (!CPVars.
empty()) {
6291 Builder.CreateStore(Builder.getInt32(0), DidIt);
6294 Directive OMPD = Directive::OMPD_single;
6296 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6297 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6298 Value *ThreadId = getOrCreateThreadID(Ident);
6301 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single);
6302 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
6304 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single);
6305 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
6307 auto FiniCBWrapper = [&](InsertPointTy IP) ->
Error {
6308 if (
Error Err = FiniCB(IP))
6315 Builder.CreateStore(Builder.getInt32(1), DidIt);
6328 InsertPointOrErrorTy AfterIP =
6329 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
6333 return AfterIP.takeError();
6336 for (
size_t I = 0,
E = CPVars.
size();
I <
E; ++
I)
6338 createCopyPrivate(LocationDescription(Builder.saveIP(),
Loc.DL),
6339 ConstantInt::get(
Int64, 0), CPVars[
I],
6342 }
else if (!IsNowait) {
6343 InsertPointOrErrorTy AfterIP =
6344 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
6345 omp::Directive::OMPD_unknown,
false,
6348 return AfterIP.takeError();
6350 return Builder.saveIP();
6353OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createCritical(
6354 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6355 FinalizeCallbackTy FiniCB,
StringRef CriticalName,
Value *HintInst) {
6357 if (!updateToLocation(
Loc))
6360 Directive OMPD = Directive::OMPD_critical;
6362 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6363 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6364 Value *ThreadId = getOrCreateThreadID(Ident);
6365 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
6366 Value *
Args[] = {Ident, ThreadId, LockVar};
6372 EnterArgs.push_back(HintInst);
6373 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint);
6375 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical);
6377 Instruction *EntryCall = Builder.CreateCall(RTFn, EnterArgs);
6380 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical);
6381 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
6383 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
6387OpenMPIRBuilder::InsertPointTy
6388OpenMPIRBuilder::createOrderedDepend(
const LocationDescription &
Loc,
6389 InsertPointTy AllocaIP,
unsigned NumLoops,
6391 const Twine &Name,
bool IsDependSource) {
6395 "OpenMP runtime requires depend vec with i64 type");
6397 if (!updateToLocation(
Loc))
6402 Builder.restoreIP(AllocaIP);
6403 AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI64Ty,
nullptr, Name);
6405 updateToLocation(
Loc);
6408 for (
unsigned I = 0;
I < NumLoops; ++
I) {
6409 Value *DependAddrGEPIter = Builder.CreateInBoundsGEP(
6410 ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(
I)});
6411 StoreInst *STInst = Builder.CreateStore(StoreValues[
I], DependAddrGEPIter);
6415 Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP(
6416 ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)});
6419 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6420 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6421 Value *ThreadId = getOrCreateThreadID(Ident);
6422 Value *
Args[] = {Ident, ThreadId, DependBaseAddrGEP};
6426 RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_post);
6428 RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_wait);
6429 Builder.CreateCall(RTLFn, Args);
6431 return Builder.saveIP();
6434OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createOrderedThreadsSimd(
6435 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6436 FinalizeCallbackTy FiniCB,
bool IsThreads) {
6437 if (!updateToLocation(
Loc))
6440 Directive OMPD = Directive::OMPD_ordered;
6446 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6447 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6448 Value *ThreadId = getOrCreateThreadID(Ident);
6451 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_ordered);
6452 EntryCall = Builder.CreateCall(EntryRTLFn, Args);
6455 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_ordered);
6456 ExitCall = Builder.CreateCall(ExitRTLFn, Args);
6459 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
6463OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::EmitOMPInlinedRegion(
6465 BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB,
bool Conditional,
6466 bool HasFinalize,
bool IsCancellable) {
6469 FinalizationStack.push_back({FiniCB, OMPD, IsCancellable});
6473 BasicBlock *EntryBB = Builder.GetInsertBlock();
6482 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
6485 if (
Error Err = BodyGenCB( InsertPointTy(),
6493 "Unexpected control flow graph state!!");
6494 InsertPointOrErrorTy AfterIP =
6495 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
6497 return AfterIP.takeError();
6499 "Unexpected Control Flow State!");
6505 "Unexpected Insertion point location!");
6508 auto InsertBB = merged ? ExitPredBB : ExitBB;
6511 Builder.SetInsertPoint(InsertBB);
6513 return Builder.saveIP();
6516OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry(
6519 if (!Conditional || !EntryCall)
6520 return Builder.saveIP();
6522 BasicBlock *EntryBB = Builder.GetInsertBlock();
6523 Value *CallBool = Builder.CreateIsNotNull(EntryCall);
6535 Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
6537 Builder.SetInsertPoint(UI);
6538 Builder.Insert(EntryBBTI);
6539 UI->eraseFromParent();
6546OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitCommonDirectiveExit(
6547 omp::Directive OMPD, InsertPointTy FinIP,
Instruction *ExitCall,
6550 Builder.restoreIP(FinIP);
6554 assert(!FinalizationStack.empty() &&
6555 "Unexpected finalization stack state!");
6557 FinalizationInfo Fi = FinalizationStack.pop_back_val();
6558 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
6560 if (
Error Err = Fi.FiniCB(FinIP))
6567 Builder.SetInsertPoint(FiniBBTI);
6571 return Builder.saveIP();
6575 Builder.Insert(ExitCall);
6581OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCopyinClauseBlocks(
6582 InsertPointTy IP,
Value *MasterAddr,
Value *PrivateAddr,
6611 "copyin.not.master.end");
6618 Builder.SetInsertPoint(OMP_Entry);
6619 Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
6620 Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
6621 Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr);
6622 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
6624 Builder.SetInsertPoint(CopyBegin);
6626 Builder.SetInsertPoint(Builder.CreateBr(CopyEnd));
6628 return Builder.saveIP();
6631CallInst *OpenMPIRBuilder::createOMPAlloc(
const LocationDescription &
Loc,
6635 updateToLocation(
Loc);
6638 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6639 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6640 Value *ThreadId = getOrCreateThreadID(Ident);
6643 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc);
6645 return Builder.CreateCall(Fn, Args, Name);
6648CallInst *OpenMPIRBuilder::createOMPFree(
const LocationDescription &
Loc,
6652 updateToLocation(
Loc);
6655 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6656 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6657 Value *ThreadId = getOrCreateThreadID(Ident);
6659 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free);
6660 return Builder.CreateCall(Fn, Args, Name);
6663CallInst *OpenMPIRBuilder::createOMPInteropInit(
6664 const LocationDescription &
Loc,
Value *InteropVar,
6666 Value *DependenceAddress,
bool HaveNowaitClause) {
6668 updateToLocation(
Loc);
6671 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6672 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6673 Value *ThreadId = getOrCreateThreadID(Ident);
6674 if (Device ==
nullptr)
6676 Constant *InteropTypeVal = ConstantInt::get(
Int32, (
int)InteropType);
6677 if (NumDependences ==
nullptr) {
6678 NumDependences = ConstantInt::get(
Int32, 0);
6682 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6684 Ident, ThreadId, InteropVar, InteropTypeVal,
6685 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
6687 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_init);
6689 return Builder.CreateCall(Fn, Args);
6692CallInst *OpenMPIRBuilder::createOMPInteropDestroy(
6693 const LocationDescription &
Loc,
Value *InteropVar,
Value *Device,
6694 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
6696 updateToLocation(
Loc);
6699 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6700 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6701 Value *ThreadId = getOrCreateThreadID(Ident);
6702 if (Device ==
nullptr)
6704 if (NumDependences ==
nullptr) {
6705 NumDependences = ConstantInt::get(
Int32, 0);
6709 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6711 Ident, ThreadId, InteropVar,
Device,
6712 NumDependences, DependenceAddress, HaveNowaitClauseVal};
6714 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_destroy);
6716 return Builder.CreateCall(Fn, Args);
6719CallInst *OpenMPIRBuilder::createOMPInteropUse(
const LocationDescription &
Loc,
6721 Value *NumDependences,
6722 Value *DependenceAddress,
6723 bool HaveNowaitClause) {
6725 updateToLocation(
Loc);
6727 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6728 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6729 Value *ThreadId = getOrCreateThreadID(Ident);
6730 if (Device ==
nullptr)
6732 if (NumDependences ==
nullptr) {
6733 NumDependences = ConstantInt::get(
Int32, 0);
6737 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6739 Ident, ThreadId, InteropVar,
Device,
6740 NumDependences, DependenceAddress, HaveNowaitClauseVal};
6742 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_use);
6744 return Builder.CreateCall(Fn, Args);
6747CallInst *OpenMPIRBuilder::createCachedThreadPrivate(
6751 updateToLocation(
Loc);
6754 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6755 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6756 Value *ThreadId = getOrCreateThreadID(Ident);
6758 getOrCreateInternalVariable(Int8PtrPtr,
Name.str());
6762 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached);
6764 return Builder.CreateCall(Fn, Args);
6767OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetInit(
6768 const LocationDescription &
Loc,
6769 const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6771 "expected num_threads and num_teams to be specified");
6773 if (!updateToLocation(
Loc))
6777 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6778 Constant *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6790 const std::string DebugPrefix =
"_debug__";
6791 if (KernelName.
ends_with(DebugPrefix)) {
6792 KernelName = KernelName.
drop_back(DebugPrefix.length());
6793 Kernel = M.getFunction(KernelName);
6799 if (
Attrs.MinTeams > 1 ||
Attrs.MaxTeams.front() > 0)
6804 int32_t MaxThreadsVal =
Attrs.MaxThreads.front();
6805 if (MaxThreadsVal < 0)
6806 MaxThreadsVal = std::max(
6809 if (MaxThreadsVal > 0)
6810 writeThreadBoundsForKernel(
T, *
Kernel,
Attrs.MinThreads, MaxThreadsVal);
6821 Function *Fn = getOrCreateRuntimeFunctionPtr(
6822 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
6825 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
6826 Constant *DynamicEnvironmentInitializer =
6830 DynamicEnvironmentInitializer, DynamicEnvironmentName,
6832 DL.getDefaultGlobalsAddressSpace());
6836 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
6837 ? DynamicEnvironmentGV
6839 DynamicEnvironmentPtr);
6842 ConfigurationEnvironment, {
6843 UseGenericStateMachineVal,
6844 MayUseNestedParallelismVal,
6851 ReductionBufferLength,
6854 KernelEnvironment, {
6855 ConfigurationEnvironmentInitializer,
6859 std::string KernelEnvironmentName =
6860 (KernelName +
"_kernel_environment").str();
6863 KernelEnvironmentInitializer, KernelEnvironmentName,
6865 DL.getDefaultGlobalsAddressSpace());
6869 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
6870 ? KernelEnvironmentGV
6872 KernelEnvironmentPtr);
6873 Value *KernelLaunchEnvironment = DebugKernelWrapper->
getArg(0);
6875 KernelLaunchEnvironment =
6876 KernelLaunchEnvironment->
getType() == KernelLaunchEnvParamTy
6877 ? KernelLaunchEnvironment
6878 : Builder.CreateAddrSpaceCast(KernelLaunchEnvironment,
6879 KernelLaunchEnvParamTy);
6881 Builder.CreateCall(Fn, {KernelEnvironment, KernelLaunchEnvironment});
6883 Value *ExecUserCode = Builder.CreateICmpEQ(
6893 auto *UI = Builder.CreateUnreachable();
6899 Builder.SetInsertPoint(WorkerExitBB);
6900 Builder.CreateRetVoid();
6903 Builder.SetInsertPoint(CheckBBTI);
6904 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
6907 UI->eraseFromParent();
6914void OpenMPIRBuilder::createTargetDeinit(
const LocationDescription &
Loc,
6915 int32_t TeamsReductionDataSize,
6916 int32_t TeamsReductionBufferLength) {
6917 if (!updateToLocation(
Loc))
6920 Function *Fn = getOrCreateRuntimeFunctionPtr(
6921 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
6923 Builder.CreateCall(Fn, {});
6925 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
6931 const std::string DebugPrefix =
"_debug__";
6933 KernelName = KernelName.
drop_back(DebugPrefix.length());
6934 auto *KernelEnvironmentGV =
6935 M.getNamedGlobal((KernelName +
"_kernel_environment").str());
6936 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
6937 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->
getInitializer();
6939 KernelEnvironmentInitializer,
6940 ConstantInt::get(
Int32, TeamsReductionDataSize), {0, 7});
6942 NewInitializer, ConstantInt::get(
Int32, TeamsReductionBufferLength),
6949 if (
Kernel.hasFnAttribute(Name)) {
6950 int32_t OldLimit =
Kernel.getFnAttributeAsParsedInteger(Name);
6956std::pair<int32_t, int32_t>
6958 int32_t ThreadLimit =
6959 Kernel.getFnAttributeAsParsedInteger(
"omp_target_thread_limit");
6962 const auto &Attr =
Kernel.getFnAttribute(
"amdgpu-flat-work-group-size");
6963 if (!Attr.isValid() || !Attr.isStringAttribute())
6964 return {0, ThreadLimit};
6965 auto [LBStr, UBStr] = Attr.getValueAsString().split(
',');
6968 return {0, ThreadLimit};
6969 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
6975 if (
Kernel.hasFnAttribute(
"nvvm.maxntid")) {
6976 int32_t UB =
Kernel.getFnAttributeAsParsedInteger(
"nvvm.maxntid");
6977 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
6979 return {0, ThreadLimit};
6982void OpenMPIRBuilder::writeThreadBoundsForKernel(
const Triple &
T,
6985 Kernel.addFnAttr(
"omp_target_thread_limit", std::to_string(UB));
6988 Kernel.addFnAttr(
"amdgpu-flat-work-group-size",
6996std::pair<int32_t, int32_t>
6999 return {0,
Kernel.getFnAttributeAsParsedInteger(
"omp_target_num_teams")};
7003 int32_t LB, int32_t UB) {
7010 Kernel.addFnAttr(
"omp_target_num_teams", std::to_string(LB));
7013void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
7015 if (Config.isTargetDevice()) {
7022 else if (
T.isNVPTX())
7024 else if (
T.isSPIRV())
7031 if (Config.isTargetDevice()) {
7032 assert(OutlinedFn &&
"The outlined function must exist if embedded");
7041Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(
Function *OutlinedFn,
7046 assert(!M.getGlobalVariable(EntryFnName,
true) &&
7047 "Named kernel already exists?");
7053Error OpenMPIRBuilder::emitTargetRegionFunction(
7054 TargetRegionEntryInfo &EntryInfo,
7055 FunctionGenCallback &GenerateFunctionCallback,
bool IsOffloadEntry,
7059 OffloadInfoManager.getTargetRegionEntryFnName(EntryFnName, EntryInfo);
7061 if (Config.isTargetDevice() || !Config.openMPOffloadMandatory()) {
7065 OutlinedFn = *CBResult;
7067 OutlinedFn =
nullptr;
7073 if (!IsOffloadEntry)
7076 std::string EntryFnIDName =
7077 Config.isTargetDevice()
7078 ? std::string(EntryFnName)
7079 : createPlatformSpecificName({EntryFnName,
"region_id"});
7081 OutlinedFnID = registerTargetRegionFunction(EntryInfo, OutlinedFn,
7082 EntryFnName, EntryFnIDName);
7086Constant *OpenMPIRBuilder::registerTargetRegionFunction(
7087 TargetRegionEntryInfo &EntryInfo,
Function *OutlinedFn,
7090 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
7091 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
7092 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
7093 OffloadInfoManager.registerTargetRegionEntryInfo(
7094 EntryInfo, EntryAddr, OutlinedFnID,
7095 OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion);
7096 return OutlinedFnID;
7099OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData(
7100 const LocationDescription &
Loc, InsertPointTy AllocaIP,
7101 InsertPointTy CodeGenIP,
Value *DeviceID,
Value *IfCond,
7102 TargetDataInfo &
Info, GenMapInfoCallbackTy GenMapInfoCB,
7104 function_ref<InsertPointOrErrorTy(InsertPointTy CodeGenIP,
7105 BodyGenTy BodyGenType)>
7108 if (!updateToLocation(
Loc))
7109 return InsertPointTy();
7111 Builder.restoreIP(CodeGenIP);
7113 if (Config.IsTargetDevice.value_or(
false)) {
7115 InsertPointOrErrorTy AfterIP =
7116 BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv);
7118 return AfterIP.takeError();
7119 Builder.restoreIP(*AfterIP);
7121 return Builder.saveIP();
7124 bool IsStandAlone = !BodyGenCB;
7125 MapInfosTy *MapInfo;
7129 auto BeginThenGen = [&](InsertPointTy AllocaIP,
7130 InsertPointTy CodeGenIP) ->
Error {
7131 MapInfo = &GenMapInfoCB(Builder.saveIP());
7132 if (
Error Err = emitOffloadingArrays(
7133 AllocaIP, Builder.saveIP(), *MapInfo,
Info, CustomMapperCB,
7134 true, DeviceAddrCB))
7137 TargetDataRTArgs RTArgs;
7138 emitOffloadingArraysArgument(Builder, RTArgs,
Info);
7141 Value *PointerNum = Builder.getInt32(
Info.NumberOfPtrs);
7146 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7147 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7151 SrcLocInfo, DeviceID,
7152 PointerNum, RTArgs.BasePointersArray,
7153 RTArgs.PointersArray, RTArgs.SizesArray,
7154 RTArgs.MapTypesArray, RTArgs.MapNamesArray,
7155 RTArgs.MappersArray};
7158 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
7162 if (
Info.HasNoWait) {
7169 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(*MapperFunc),
7172 if (
Info.HasNoWait) {
7176 emitBlock(OffloadContBlock, CurFn,
true);
7177 Builder.restoreIP(Builder.saveIP());
7182 bool RequiresOuterTargetTask =
Info.HasNoWait;
7183 if (!RequiresOuterTargetTask)
7184 cantFail(TaskBodyCB(
nullptr,
nullptr,
7187 cantFail(emitTargetTask(TaskBodyCB, DeviceID, SrcLocInfo, AllocaIP,
7188 {}, RTArgs,
Info.HasNoWait));
7190 Function *BeginMapperFunc = getOrCreateRuntimeFunctionPtr(
7191 omp::OMPRTL___tgt_target_data_begin_mapper);
7193 Builder.CreateCall(BeginMapperFunc, OffloadingArgs);
7195 for (
auto DeviceMap :
Info.DevicePtrInfoMap) {
7198 Builder.CreateLoad(Builder.getPtrTy(), DeviceMap.second.first);
7199 Builder.CreateStore(LI, DeviceMap.second.second);
7206 InsertPointOrErrorTy AfterIP =
7207 BodyGenCB(Builder.saveIP(), BodyGenTy::Priv);
7209 return AfterIP.takeError();
7210 Builder.restoreIP(*AfterIP);
7218 auto BeginElseGen = [&](InsertPointTy AllocaIP,
7219 InsertPointTy CodeGenIP) ->
Error {
7220 InsertPointOrErrorTy AfterIP =
7221 BodyGenCB(Builder.saveIP(), BodyGenTy::DupNoPriv);
7223 return AfterIP.takeError();
7224 Builder.restoreIP(*AfterIP);
7229 auto EndThenGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7230 TargetDataRTArgs RTArgs;
7231 Info.EmitDebug = !MapInfo->Names.empty();
7232 emitOffloadingArraysArgument(Builder, RTArgs,
Info,
true);
7235 Value *PointerNum = Builder.getInt32(
Info.NumberOfPtrs);
7240 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7241 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7244 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
7245 PointerNum, RTArgs.BasePointersArray,
7246 RTArgs.PointersArray, RTArgs.SizesArray,
7247 RTArgs.MapTypesArray, RTArgs.MapNamesArray,
7248 RTArgs.MappersArray};
7250 getOrCreateRuntimeFunctionPtr(omp::OMPRTL___tgt_target_data_end_mapper);
7252 Builder.CreateCall(EndMapperFunc, OffloadingArgs);
7258 auto EndElseGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7266 return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
7267 return BeginThenGen(AllocaIP, Builder.saveIP());
7275 InsertPointOrErrorTy AfterIP =
7276 BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv);
7278 return AfterIP.takeError();
7282 return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
7283 return EndThenGen(AllocaIP, Builder.saveIP());
7286 return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
7287 return BeginThenGen(AllocaIP, Builder.saveIP());
7293 return Builder.saveIP();
7297OpenMPIRBuilder::createForStaticInitFunction(
unsigned IVSize,
bool IVSigned,
7298 bool IsGPUDistribute) {
7299 assert((IVSize == 32 || IVSize == 64) &&
7300 "IV size is not compatible with the omp runtime");
7302 if (IsGPUDistribute)
7304 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
7305 : omp::OMPRTL___kmpc_distribute_static_init_4u)
7306 : (IVSigned ?
omp::OMPRTL___kmpc_distribute_static_init_8
7307 :
omp::OMPRTL___kmpc_distribute_static_init_8u);
7309 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
7310 : omp::OMPRTL___kmpc_for_static_init_4u)
7311 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
7312 : omp::OMPRTL___kmpc_for_static_init_8u);
7314 return getOrCreateRuntimeFunction(M, Name);
7317FunctionCallee OpenMPIRBuilder::createDispatchInitFunction(
unsigned IVSize,
7319 assert((IVSize == 32 || IVSize == 64) &&
7320 "IV size is not compatible with the omp runtime");
7322 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
7323 : omp::OMPRTL___kmpc_dispatch_init_4u)
7324 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_init_8
7325 :
omp::OMPRTL___kmpc_dispatch_init_8u);
7327 return getOrCreateRuntimeFunction(M, Name);
7330FunctionCallee OpenMPIRBuilder::createDispatchNextFunction(
unsigned IVSize,
7332 assert((IVSize == 32 || IVSize == 64) &&
7333 "IV size is not compatible with the omp runtime");
7335 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
7336 : omp::OMPRTL___kmpc_dispatch_next_4u)
7337 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_next_8
7338 :
omp::OMPRTL___kmpc_dispatch_next_8u);
7340 return getOrCreateRuntimeFunction(M, Name);
7343FunctionCallee OpenMPIRBuilder::createDispatchFiniFunction(
unsigned IVSize,
7345 assert((IVSize == 32 || IVSize == 64) &&
7346 "IV size is not compatible with the omp runtime");
7348 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
7349 : omp::OMPRTL___kmpc_dispatch_fini_4u)
7350 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_fini_8
7351 :
omp::OMPRTL___kmpc_dispatch_fini_8u);
7353 return getOrCreateRuntimeFunction(M, Name);
7357 return getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_dispatch_deinit);
7362 DenseMap<
Value *, std::tuple<Value *, unsigned>> &ValueReplacementMap) {
7370 auto GetUpdatedDIVariable = [&](
DILocalVariable *OldVar,
unsigned arg) {
7374 if (NewVar && (arg == NewVar->
getArg()))
7384 auto UpdateDebugRecord = [&](
auto *DR) {
7387 for (
auto Loc : DR->location_ops()) {
7388 auto Iter = ValueReplacementMap.find(
Loc);
7389 if (Iter != ValueReplacementMap.end()) {
7390 DR->replaceVariableLocationOp(
Loc, std::get<0>(Iter->second));
7391 ArgNo = std::get<1>(Iter->second) + 1;
7395 DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo));
7402 "Unexpected debug intrinsic");
7404 UpdateDebugRecord(&DVR);
7407 if (OMPBuilder.Config.isTargetDevice()) {
7409 Module *M = Func->getParent();
7412 DB.createQualifiedType(dwarf::DW_TAG_pointer_type,
nullptr);
7414 NewSP,
"dyn_ptr", 1, NewSP->
getFile(), 0,
7415 VoidPtrTy,
false, DINode::DIFlags::FlagArtificial);
7417 DB.insertDeclare(&(*Func->arg_begin()), Var, DB.createExpression(),
Loc,
7430 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
7432 OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
7433 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
7435 if (OMPBuilder.Config.isTargetDevice()) {
7443 for (
auto &Arg : Inputs)
7448 for (
auto &Arg : Inputs)
7452 auto BB = Builder.GetInsertBlock();
7464 if (TargetCpuAttr.isStringAttribute())
7465 Func->addFnAttr(TargetCpuAttr);
7467 auto TargetFeaturesAttr = ParentFn->
getFnAttribute(
"target-features");
7468 if (TargetFeaturesAttr.isStringAttribute())
7469 Func->addFnAttr(TargetFeaturesAttr);
7471 if (OMPBuilder.Config.isTargetDevice()) {
7473 OMPBuilder.emitKernelExecutionMode(FuncName, DefaultAttrs.ExecFlags);
7474 OMPBuilder.emitUsed(
"llvm.compiler.used", {ExecMode});
7485 Builder.SetInsertPoint(EntryBB);
7488 if (OMPBuilder.Config.isTargetDevice())
7489 Builder.restoreIP(OMPBuilder.createTargetInit(Builder, DefaultAttrs));
7491 BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock();
7496 if (OMPBuilder.Config.isTargetDevice())
7497 OMPBuilder.ConstantAllocaRaiseCandidates.emplace_back(Func);
7501 splitBB(Builder,
true,
"outlined.body");
7502 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = CBFunc(
7504 OpenMPIRBuilder::InsertPointTy(OutlinedBodyBB, OutlinedBodyBB->
begin()));
7506 return AfterIP.takeError();
7507 Builder.restoreIP(*AfterIP);
7508 if (OMPBuilder.Config.isTargetDevice())
7509 OMPBuilder.createTargetDeinit(Builder);
7512 Builder.CreateRetVoid();
7516 auto AllocaIP = Builder.saveIP();
7521 const auto &ArgRange =
7522 OMPBuilder.Config.isTargetDevice()
7523 ?
make_range(Func->arg_begin() + 1, Func->arg_end())
7556 if (Instr->getFunction() == Func)
7557 Instr->replaceUsesOfWith(
Input, InputCopy);
7563 for (
auto InArg :
zip(Inputs, ArgRange)) {
7565 Argument &Arg = std::get<1>(InArg);
7566 Value *InputCopy =
nullptr;
7568 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
7569 ArgAccessorFuncCB(Arg,
Input, InputCopy, AllocaIP, Builder.saveIP());
7571 return AfterIP.takeError();
7572 Builder.restoreIP(*AfterIP);
7573 ValueReplacementMap[
Input] = std::make_tuple(InputCopy, Arg.
getArgNo());
7593 DeferredReplacement.push_back(std::make_pair(
Input, InputCopy));
7600 ReplaceValue(
Input, InputCopy, Func);
7604 for (
auto Deferred : DeferredReplacement)
7605 ReplaceValue(std::get<0>(Deferred), std::get<1>(Deferred), Func);
7608 ValueReplacementMap);
7616 Value *TaskWithPrivates,
7617 Type *TaskWithPrivatesTy) {
7619 Type *TaskTy = OMPIRBuilder.Task;
7622 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 0);
7623 Value *Shareds = TaskT;
7633 if (TaskWithPrivatesTy != TaskTy)
7634 Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
7651 const size_t NumOffloadingArrays,
const int SharedArgsOperandNo) {
7656 assert((!NumOffloadingArrays || PrivatesTy) &&
7657 "PrivatesTy cannot be nullptr when there are offloadingArrays"
7660 Module &M = OMPBuilder.M;
7684 OpenMPIRBuilder::InsertPointTy IP(StaleCI->
getParent(),
7690 Type *TaskPtrTy = OMPBuilder.TaskPtr;
7691 [[maybe_unused]]
Type *TaskTy = OMPBuilder.Task;
7697 ".omp_target_task_proxy_func",
7698 Builder.GetInsertBlock()->getModule());
7699 Value *ThreadId = ProxyFn->getArg(0);
7700 Value *TaskWithPrivates = ProxyFn->getArg(1);
7701 ThreadId->
setName(
"thread.id");
7702 TaskWithPrivates->
setName(
"task");
7704 bool HasShareds = SharedArgsOperandNo > 0;
7705 bool HasOffloadingArrays = NumOffloadingArrays > 0;
7708 Builder.SetInsertPoint(EntryBB);
7714 if (HasOffloadingArrays) {
7715 assert(TaskTy != TaskWithPrivatesTy &&
7716 "If there are offloading arrays to pass to the target"
7717 "TaskTy cannot be the same as TaskWithPrivatesTy");
7720 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 1);
7721 for (
unsigned int i = 0; i < NumOffloadingArrays; ++i)
7723 Builder.CreateStructGEP(PrivatesTy, Privates, i));
7727 auto *ArgStructAlloca =
7729 assert(ArgStructAlloca &&
7730 "Unable to find the alloca instruction corresponding to arguments "
7731 "for extracted function");
7735 Builder.CreateAlloca(ArgStructType,
nullptr,
"structArg");
7737 Value *SharedsSize =
7738 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
7741 OMPBuilder, Builder, TaskWithPrivates, TaskWithPrivatesTy);
7743 Builder.CreateMemCpy(
7744 NewArgStructAlloca, NewArgStructAlloca->
getAlign(), LoadShared,
7746 KernelLaunchArgs.
push_back(NewArgStructAlloca);
7748 Builder.CreateCall(KernelLaunchFunction, KernelLaunchArgs);
7749 Builder.CreateRetVoid();
7755 return GEP->getSourceElementType();
7757 return Alloca->getAllocatedType();
7780 if (OffloadingArraysToPrivatize.
empty())
7781 return OMPIRBuilder.Task;
7784 for (
Value *V : OffloadingArraysToPrivatize) {
7785 assert(V->getType()->isPointerTy() &&
7786 "Expected pointer to array to privatize. Got a non-pointer value "
7789 assert(ArrayTy &&
"ArrayType cannot be nullptr");
7795 "struct.task_with_privates");
7798 OpenMPIRBuilder &OMPBuilder,
IRBuilderBase &Builder,
bool IsOffloadEntry,
7799 TargetRegionEntryInfo &EntryInfo,
7800 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
7803 OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
7804 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
7806 OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
7809 EntryFnName, Inputs, CBFunc,
7813 return OMPBuilder.emitTargetRegionFunction(
7814 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
7818OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
7819 TargetTaskBodyCallbackTy TaskBodyCB,
Value *DeviceID,
Value *RTLoc,
7820 OpenMPIRBuilder::InsertPointTy AllocaIP,
7822 const TargetDataRTArgs &RTArgs,
bool HasNoWait) {
7946 splitBB(Builder,
true,
"target.task.body");
7948 splitBB(Builder,
true,
"target.task.alloca");
7950 InsertPointTy TargetTaskAllocaIP(TargetTaskAllocaBB,
7951 TargetTaskAllocaBB->
begin());
7952 InsertPointTy TargetTaskBodyIP(TargetTaskBodyBB, TargetTaskBodyBB->
begin());
7955 OI.EntryBB = TargetTaskAllocaBB;
7956 OI.OuterAllocaBB = AllocaIP.getBlock();
7961 Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP,
"global.tid",
false));
7964 Builder.restoreIP(TargetTaskBodyIP);
7965 if (
Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
7979 emitBlock(OI.ExitBB, Builder.GetInsertBlock()->getParent(),
7983 bool NeedsTargetTask = HasNoWait && DeviceID;
7984 if (NeedsTargetTask) {
7986 {RTArgs.BasePointersArray, RTArgs.PointersArray, RTArgs.MappersArray,
7987 RTArgs.MapNamesArray, RTArgs.MapTypesArray, RTArgs.MapTypesArrayEnd,
7988 RTArgs.SizesArray}) {
7990 OffloadingArraysToPrivatize.
push_back(V);
7991 OI.ExcludeArgsFromAggregate.push_back(V);
7995 OI.PostOutlineCB = [
this, ToBeDeleted, Dependencies, NeedsTargetTask,
7996 DeviceID, OffloadingArraysToPrivatize](
7999 "there must be a single user for the outlined function");
8013 const unsigned int NumStaleCIArgs = StaleCI->
arg_size();
8014 bool HasShareds = NumStaleCIArgs > OffloadingArraysToPrivatize.
size() + 1;
8016 NumStaleCIArgs == (OffloadingArraysToPrivatize.
size() + 2)) &&
8017 "Wrong number of arguments for StaleCI when shareds are present");
8018 int SharedArgOperandNo =
8019 HasShareds ? OffloadingArraysToPrivatize.
size() + 1 : 0;
8025 if (!OffloadingArraysToPrivatize.
empty())
8030 *
this, Builder, StaleCI, PrivatesTy, TaskWithPrivatesTy,
8031 OffloadingArraysToPrivatize.
size(), SharedArgOperandNo);
8033 LLVM_DEBUG(
dbgs() <<
"Proxy task entry function created: " << *ProxyFn
8036 Builder.SetInsertPoint(StaleCI);
8041 getOrCreateSrcLocStr(LocationDescription(Builder), SrcLocStrSize);
8042 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
8051 ? getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
8052 : getOrCreateRuntimeFunctionPtr(
8053 OMPRTL___kmpc_omp_target_task_alloc);
8057 Value *ThreadID = getOrCreateThreadID(Ident);
8064 Value *TaskSize = Builder.getInt64(
8065 M.getDataLayout().getTypeStoreSize(TaskWithPrivatesTy));
8070 Value *SharedsSize = Builder.getInt64(0);
8072 auto *ArgStructAlloca =
8074 assert(ArgStructAlloca &&
8075 "Unable to find the alloca instruction corresponding to arguments "
8076 "for extracted function");
8077 auto *ArgStructType =
8079 assert(ArgStructType &&
"Unable to find struct type corresponding to "
8080 "arguments for extracted function");
8082 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
8091 Value *Flags = Builder.getInt32(0);
8101 TaskSize, SharedsSize,
8104 if (NeedsTargetTask) {
8105 assert(DeviceID &&
"Expected non-empty device ID.");
8109 TaskData = Builder.CreateCall(TaskAllocFn, TaskAllocArgs);
8115 *
this, Builder, TaskData, TaskWithPrivatesTy);
8116 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
8119 if (!OffloadingArraysToPrivatize.
empty()) {
8121 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskData, 1);
8122 for (
unsigned int i = 0; i < OffloadingArraysToPrivatize.
size(); ++i) {
8123 Value *PtrToPrivatize = OffloadingArraysToPrivatize[i];
8130 "ElementType should match ArrayType");
8133 Value *Dst = Builder.CreateStructGEP(PrivatesTy, Privates, i);
8134 Builder.CreateMemCpy(
8135 Dst, Alignment, PtrToPrivatize, Alignment,
8136 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ElementType)));
8150 if (!NeedsTargetTask) {
8153 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_wait_deps);
8157 Builder.getInt32(Dependencies.size()),
8159 ConstantInt::get(Builder.getInt32Ty(), 0),
8165 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0);
8167 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0);
8168 Builder.CreateCall(TaskBeginFn, {Ident, ThreadID, TaskData});
8169 CallInst *CI = Builder.CreateCall(ProxyFn, {ThreadID, TaskData});
8171 Builder.CreateCall(TaskCompleteFn, {Ident, ThreadID, TaskData});
8172 }
else if (DepArray) {
8177 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps);
8180 {Ident, ThreadID, TaskData, Builder.getInt32(Dependencies.size()),
8181 DepArray, ConstantInt::get(Builder.getInt32Ty(), 0),
8185 Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task);
8186 Builder.CreateCall(TaskFn, {Ident, ThreadID, TaskData});
8191 I->eraseFromParent();
8193 addOutlineInfo(std::move(OI));
8196 << *(Builder.GetInsertBlock()) <<
"\n");
8198 << *(Builder.GetInsertBlock()->getParent()->getParent())
8200 return Builder.saveIP();
8203Error OpenMPIRBuilder::emitOffloadingArraysAndArgs(
8204 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &
Info,
8205 TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo,
8206 CustomMapperCallbackTy CustomMapperCB,
bool IsNonContiguous,
8209 emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo,
Info,
8210 CustomMapperCB, IsNonContiguous, DeviceAddrCB))
8212 emitOffloadingArraysArgument(Builder, RTArgs,
Info, ForEndCall);
8218 OpenMPIRBuilder::InsertPointTy AllocaIP,
8219 OpenMPIRBuilder::TargetDataInfo &
Info,
8220 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
8221 const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs,
8224 OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB,
8225 OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB,
8231 auto &&EmitTargetCallFallbackCB = [&](OpenMPIRBuilder::InsertPointTy IP)
8232 -> OpenMPIRBuilder::InsertPointOrErrorTy {
8233 Builder.restoreIP(IP);
8234 Builder.CreateCall(OutlinedFn, Args);
8235 return Builder.saveIP();
8238 bool HasDependencies = Dependencies.
size() > 0;
8239 bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
8241 OpenMPIRBuilder::TargetKernelArgs KArgs;
8248 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8256 if (OutlinedFnID && DeviceID)
8257 return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID,
8258 EmitTargetCallFallbackCB, KArgs,
8259 DeviceID, RTLoc, TargetTaskAllocaIP);
8267 return EmitTargetCallFallbackCB(OMPBuilder.Builder.saveIP());
8270 OMPBuilder.Builder.restoreIP(AfterIP);
8274 auto &&EmitTargetCallElse =
8275 [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
8276 OpenMPIRBuilder::InsertPointTy CodeGenIP) ->
Error {
8279 OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8280 if (RequiresOuterTargetTask) {
8284 OpenMPIRBuilder::TargetDataRTArgs EmptyRTArgs;
8285 return OMPBuilder.emitTargetTask(TaskBodyCB,
nullptr,
8287 Dependencies, EmptyRTArgs, HasNoWait);
8289 return EmitTargetCallFallbackCB(Builder.saveIP());
8292 Builder.restoreIP(AfterIP);
8296 auto &&EmitTargetCallThen =
8297 [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
8298 OpenMPIRBuilder::InsertPointTy CodeGenIP) ->
Error {
8299 Info.HasNoWait = HasNoWait;
8300 OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP());
8301 OpenMPIRBuilder::TargetDataRTArgs RTArgs;
8302 if (
Error Err = OMPBuilder.emitOffloadingArraysAndArgs(
8303 AllocaIP, Builder.saveIP(),
Info, RTArgs, MapInfo, CustomMapperCB,
8310 zip_equal(DefaultAttrs.MaxTeams, RuntimeAttrs.MaxTeams))
8311 NumTeamsC.
push_back(RuntimeVal ? RuntimeVal
8316 auto InitMaxThreadsClause = [&Builder](
Value *
Clause) {
8318 Clause = Builder.CreateIntCast(
Clause, Builder.getInt32Ty(),
8322 auto CombineMaxThreadsClauses = [&Builder](
Value *
Clause,
Value *&Result) {
8325 Result ? Builder.CreateSelect(Builder.CreateICmpULT(Result,
Clause),
8333 Value *MaxThreadsClause =
8334 RuntimeAttrs.TeamsThreadLimit.size() == 1
8335 ? InitMaxThreadsClause(RuntimeAttrs.MaxThreads)
8338 for (
auto [TeamsVal, TargetVal] :
zip_equal(
8339 RuntimeAttrs.TeamsThreadLimit, RuntimeAttrs.TargetThreadLimit)) {
8340 Value *TeamsThreadLimitClause = InitMaxThreadsClause(TeamsVal);
8341 Value *NumThreads = InitMaxThreadsClause(TargetVal);
8343 CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads);
8344 CombineMaxThreadsClauses(MaxThreadsClause, NumThreads);
8346 NumThreadsC.
push_back(NumThreads ? NumThreads : Builder.getInt32(0));
8349 unsigned NumTargetItems =
Info.NumberOfPtrs;
8353 Constant *SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8354 Value *RTLoc = OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize,
8357 Value *TripCount = RuntimeAttrs.LoopTripCount
8358 ? Builder.CreateIntCast(RuntimeAttrs.LoopTripCount,
8359 Builder.getInt64Ty(),
8361 : Builder.getInt64(0);
8364 Value *DynCGGroupMem = Builder.getInt32(0);
8366 KArgs = OpenMPIRBuilder::TargetKernelArgs(NumTargetItems, RTArgs, TripCount,
8367 NumTeamsC, NumThreadsC,
8368 DynCGGroupMem, HasNoWait);
8372 OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8375 if (RequiresOuterTargetTask)
8376 return OMPBuilder.emitTargetTask(TaskBodyCB, DeviceID, RTLoc, AllocaIP,
8377 Dependencies, KArgs.RTArgs,
8380 return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID,
8381 EmitTargetCallFallbackCB, KArgs,
8382 DeviceID, RTLoc, AllocaIP);
8385 Builder.restoreIP(AfterIP);
8392 if (!OutlinedFnID) {
8393 cantFail(EmitTargetCallElse(AllocaIP, Builder.saveIP()));
8399 cantFail(EmitTargetCallThen(AllocaIP, Builder.saveIP()));
8403 cantFail(OMPBuilder.emitIfClause(IfCond, EmitTargetCallThen,
8404 EmitTargetCallElse, AllocaIP));
8407OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTarget(
8408 const LocationDescription &
Loc,
bool IsOffloadEntry, InsertPointTy AllocaIP,
8409 InsertPointTy CodeGenIP, TargetDataInfo &
Info,
8410 TargetRegionEntryInfo &EntryInfo,
8411 const TargetKernelDefaultAttrs &DefaultAttrs,
8412 const TargetKernelRuntimeAttrs &RuntimeAttrs,
Value *IfCond,
8414 OpenMPIRBuilder::TargetBodyGenCallbackTy CBFunc,
8415 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB,
8416 CustomMapperCallbackTy CustomMapperCB,
8419 if (!updateToLocation(
Loc))
8420 return InsertPointTy();
8422 Builder.restoreIP(CodeGenIP);
8430 *
this, Builder, IsOffloadEntry, EntryInfo, DefaultAttrs, OutlinedFn,
8431 OutlinedFnID, Inputs, CBFunc, ArgAccessorFuncCB))
8437 if (!Config.isTargetDevice())
8439 IfCond, OutlinedFn, OutlinedFnID, Inputs, GenMapInfoCB,
8440 CustomMapperCB, Dependencies, HasNowait);
8441 return Builder.saveIP();
8454 return OS.
str().str();
8459 return OpenMPIRBuilder::getNameWithSeparators(Parts, Config.firstSeparator(),
8460 Config.separator());
8464OpenMPIRBuilder::getOrCreateInternalVariable(
Type *Ty,
const StringRef &Name,
8466 auto &Elem = *InternalVars.try_emplace(Name,
nullptr).first;
8468 assert(Elem.second->getValueType() == Ty &&
8469 "OMP internal variable has different type than requested");
8485 GV->setAlignment(std::max(TypeAlign, PtrAlign));
8492Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
8493 std::string
Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
8494 std::string
Name = getNameWithSeparators({
Prefix,
"var"},
".",
".");
8495 return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
8498Value *OpenMPIRBuilder::getSizeInBytes(
Value *BasePtr) {
8503 Builder.CreateGEP(
BasePtr->getType(),
Null, Builder.getInt32(1));
8505 return SizePtrToInt;
8510 std::string VarName) {
8514 M, MaptypesArrayInit->
getType(),
8518 return MaptypesArrayGlobal;
8521void OpenMPIRBuilder::createMapperAllocas(
const LocationDescription &
Loc,
8522 InsertPointTy AllocaIP,
8523 unsigned NumOperands,
8524 struct MapperAllocas &MapperAllocas) {
8525 if (!updateToLocation(
Loc))
8530 Builder.restoreIP(AllocaIP);
8532 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
8536 ArrI64Ty,
nullptr,
".offload_sizes");
8537 updateToLocation(
Loc);
8538 MapperAllocas.ArgsBase = ArgsBase;
8539 MapperAllocas.Args =
Args;
8540 MapperAllocas.ArgSizes = ArgSizes;
8543void OpenMPIRBuilder::emitMapperCall(
const LocationDescription &
Loc,
8546 struct MapperAllocas &MapperAllocas,
8547 int64_t DeviceID,
unsigned NumOperands) {
8548 if (!updateToLocation(
Loc))
8553 Value *ArgsBaseGEP =
8554 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.ArgsBase,
8555 {Builder.getInt32(0), Builder.getInt32(0)});
8557 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.Args,
8558 {Builder.getInt32(0), Builder.getInt32(0)});
8559 Value *ArgSizesGEP =
8560 Builder.CreateInBoundsGEP(ArrI64Ty, MapperAllocas.ArgSizes,
8561 {Builder.getInt32(0), Builder.getInt32(0)});
8564 Builder.CreateCall(MapperFunc,
8565 {SrcLocInfo, Builder.getInt64(DeviceID),
8566 Builder.getInt32(NumOperands), ArgsBaseGEP, ArgsGEP,
8567 ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr});
8570void OpenMPIRBuilder::emitOffloadingArraysArgument(
IRBuilderBase &Builder,
8571 TargetDataRTArgs &RTArgs,
8572 TargetDataInfo &
Info,
8574 assert((!ForEndCall ||
Info.separateBeginEndCalls()) &&
8575 "expected region end call to runtime only when end call is separate");
8577 auto VoidPtrTy = UnqualPtrTy;
8578 auto VoidPtrPtrTy = UnqualPtrTy;
8580 auto Int64PtrTy = UnqualPtrTy;
8582 if (!
Info.NumberOfPtrs) {
8592 RTArgs.BasePointersArray = Builder.CreateConstInBoundsGEP2_32(
8594 Info.RTArgs.BasePointersArray,
8596 RTArgs.PointersArray = Builder.CreateConstInBoundsGEP2_32(
8600 RTArgs.SizesArray = Builder.CreateConstInBoundsGEP2_32(
8603 RTArgs.MapTypesArray = Builder.CreateConstInBoundsGEP2_32(
8605 ForEndCall &&
Info.RTArgs.MapTypesArrayEnd ?
Info.RTArgs.MapTypesArrayEnd
8606 :
Info.RTArgs.MapTypesArray,
8612 if (!
Info.EmitDebug)
8615 RTArgs.MapNamesArray = Builder.CreateConstInBoundsGEP2_32(
8621 if (!
Info.HasMapper)
8624 RTArgs.MappersArray =
8625 Builder.CreatePointerCast(
Info.RTArgs.MappersArray, VoidPtrPtrTy);
8628void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP,
8629 InsertPointTy CodeGenIP,
8630 MapInfosTy &CombinedInfo,
8631 TargetDataInfo &
Info) {
8632 MapInfosTy::StructNonContiguousInfo &NonContigInfo =
8633 CombinedInfo.NonContigInfo;
8646 "struct.descriptor_dim");
8648 enum { OffsetFD = 0, CountFD, StrideFD };
8652 for (
unsigned I = 0, L = 0,
E = NonContigInfo.Dims.size();
I <
E; ++
I) {
8655 if (NonContigInfo.Dims[
I] == 1)
8657 Builder.restoreIP(AllocaIP);
8660 Builder.CreateAlloca(ArrayTy,
nullptr,
"dims");
8661 Builder.restoreIP(CodeGenIP);
8662 for (
unsigned II = 0, EE = NonContigInfo.Dims[
I];
II < EE; ++
II) {
8663 unsigned RevIdx = EE -
II - 1;
8664 Value *DimsLVal = Builder.CreateInBoundsGEP(
8666 {Builder.getInt64(0), Builder.getInt64(II)});
8668 Value *OffsetLVal = Builder.CreateStructGEP(DimTy, DimsLVal, OffsetFD);
8669 Builder.CreateAlignedStore(
8670 NonContigInfo.Offsets[L][RevIdx], OffsetLVal,
8671 M.getDataLayout().getPrefTypeAlign(OffsetLVal->
getType()));
8673 Value *CountLVal = Builder.CreateStructGEP(DimTy, DimsLVal, CountFD);
8674 Builder.CreateAlignedStore(
8675 NonContigInfo.Counts[L][RevIdx], CountLVal,
8676 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
8678 Value *StrideLVal = Builder.CreateStructGEP(DimTy, DimsLVal, StrideFD);
8679 Builder.CreateAlignedStore(
8680 NonContigInfo.Strides[L][RevIdx], StrideLVal,
8681 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
8684 Builder.restoreIP(CodeGenIP);
8685 Value *DAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
8686 DimsAddr, Builder.getPtrTy());
8687 Value *
P = Builder.CreateConstInBoundsGEP2_32(
8689 Info.RTArgs.PointersArray, 0,
I);
8690 Builder.CreateAlignedStore(
8691 DAddr,
P, M.getDataLayout().getPrefTypeAlign(Builder.getPtrTy()));
8696void OpenMPIRBuilder::emitUDMapperArrayInitOrDel(
8704 M.getContext(), createPlatformSpecificName({
"omp.array",
Prefix}));
8706 Builder.CreateICmpSGT(
Size, Builder.getInt64(1),
"omp.arrayinit.isarray");
8707 Value *DeleteBit = Builder.CreateAnd(
8710 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8711 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
8716 Value *BaseIsBegin = Builder.CreateICmpNE(
Base, Begin);
8718 Value *PtrAndObjBit = Builder.CreateAnd(
8721 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8722 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
8723 PtrAndObjBit = Builder.CreateIsNotNull(PtrAndObjBit);
8724 BaseIsBegin = Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
8725 Cond = Builder.CreateOr(IsArray, BaseIsBegin);
8726 DeleteCond = Builder.CreateIsNull(
8728 createPlatformSpecificName({
"omp.array",
Prefix,
".delete"}));
8731 DeleteCond = Builder.CreateIsNotNull(
8733 createPlatformSpecificName({
"omp.array",
Prefix,
".delete"}));
8735 Cond = Builder.CreateAnd(
Cond, DeleteCond);
8736 Builder.CreateCondBr(
Cond, BodyBB, ExitBB);
8738 emitBlock(BodyBB, MapperFn);
8741 Value *ArraySize = Builder.CreateNUWMul(
Size, Builder.getInt64(ElementSize));
8744 Value *MapTypeArg = Builder.CreateAnd(
8747 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8748 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8749 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8750 MapTypeArg = Builder.CreateOr(
8753 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8754 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
8758 Value *OffloadingArgs[] = {MapperHandle,
Base, Begin,
8759 ArraySize, MapTypeArg, MapName};
8761 getOrCreateRuntimeFunction(M, OMPRTL___tgt_push_mapper_component),
8769 Type *ElemTy,
StringRef FuncName, CustomMapperCallbackTy CustomMapperCB) {
8785 MapperFn->
addFnAttr(Attribute::NoInline);
8786 MapperFn->
addFnAttr(Attribute::NoUnwind);
8796 auto SavedIP = Builder.saveIP();
8797 Builder.SetInsertPoint(EntryBB);
8809 TypeSize ElementSize = M.getDataLayout().getTypeStoreSize(ElemTy);
8810 Size = Builder.CreateExactUDiv(
Size, Builder.getInt64(ElementSize));
8811 Value *PtrBegin = BeginIn;
8812 Value *PtrEnd = Builder.CreateGEP(ElemTy, PtrBegin,
Size);
8817 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
8818 MapType, MapName, ElementSize, HeadBB,
8824 emitBlock(HeadBB, MapperFn);
8829 Builder.CreateICmpEQ(PtrBegin, PtrEnd,
"omp.arraymap.isempty");
8830 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
8833 emitBlock(BodyBB, MapperFn);
8836 Builder.CreatePHI(PtrBegin->
getType(), 2,
"omp.arraymap.ptrcurrent");
8840 MapInfosOrErrorTy
Info = GenMapInfoCB(Builder.saveIP(), PtrPHI, BeginIn);
8842 return Info.takeError();
8846 Value *OffloadingArgs[] = {MapperHandle};
8847 Value *PreviousSize = Builder.CreateCall(
8848 getOrCreateRuntimeFunction(M, OMPRTL___tgt_mapper_num_components),
8850 Value *ShiftedPreviousSize =
8851 Builder.CreateShl(PreviousSize, Builder.getInt64(getFlagMemberOffset()));
8854 for (
unsigned I = 0;
I <
Info->BasePointers.size(); ++
I) {
8863 Value *OriMapType = Builder.getInt64(
8864 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8866 Value *MemberMapType =
8867 Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
8881 Value *LeftToFrom = Builder.CreateAnd(
8884 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8885 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8886 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8895 Value *IsAlloc = Builder.CreateIsNull(LeftToFrom);
8896 Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
8898 emitBlock(AllocBB, MapperFn);
8899 Value *AllocMapType = Builder.CreateAnd(
8902 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8903 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8904 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8905 Builder.CreateBr(EndBB);
8906 emitBlock(AllocElseBB, MapperFn);
8907 Value *IsTo = Builder.CreateICmpEQ(
8910 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8911 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
8912 Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
8914 emitBlock(ToBB, MapperFn);
8915 Value *ToMapType = Builder.CreateAnd(
8918 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8919 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8920 Builder.CreateBr(EndBB);
8921 emitBlock(ToElseBB, MapperFn);
8922 Value *IsFrom = Builder.CreateICmpEQ(
8925 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8926 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8927 Builder.CreateCondBr(IsFrom, FromBB, EndBB);
8929 emitBlock(FromBB, MapperFn);
8930 Value *FromMapType = Builder.CreateAnd(
8933 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8934 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
8936 emitBlock(EndBB, MapperFn);
8939 Builder.CreatePHI(Builder.getInt64Ty(), 4,
"omp.maptype");
8945 Value *OffloadingArgs[] = {MapperHandle, CurBaseArg, CurBeginArg,
8946 CurSizeArg, CurMapType, CurNameArg};
8948 auto ChildMapperFn = CustomMapperCB(
I);
8950 return ChildMapperFn.takeError();
8951 if (*ChildMapperFn) {
8953 Builder.CreateCall(*ChildMapperFn, OffloadingArgs)->setDoesNotThrow();
8958 getOrCreateRuntimeFunction(M, OMPRTL___tgt_push_mapper_component),
8965 Value *PtrNext = Builder.CreateConstGEP1_32(ElemTy, PtrPHI, 1,
8966 "omp.arraymap.next");
8968 Value *IsDone = Builder.CreateICmpEQ(PtrNext, PtrEnd,
"omp.arraymap.isdone");
8970 Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
8972 emitBlock(ExitBB, MapperFn);
8975 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
8976 MapType, MapName, ElementSize, DoneBB,
8980 emitBlock(DoneBB, MapperFn,
true);
8982 Builder.CreateRetVoid();
8983 Builder.restoreIP(SavedIP);
8987Error OpenMPIRBuilder::emitOffloadingArrays(
8988 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
8989 TargetDataInfo &
Info, CustomMapperCallbackTy CustomMapperCB,
8990 bool IsNonContiguous,
8994 Info.clearArrayInfo();
8995 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
8997 if (
Info.NumberOfPtrs == 0)
9000 Builder.restoreIP(AllocaIP);
9006 Info.RTArgs.BasePointersArray = Builder.CreateAlloca(
9007 PointerArrayType,
nullptr,
".offload_baseptrs");
9009 Info.RTArgs.PointersArray = Builder.CreateAlloca(
9010 PointerArrayType,
nullptr,
".offload_ptrs");
9011 AllocaInst *MappersArray = Builder.CreateAlloca(
9012 PointerArrayType,
nullptr,
".offload_mappers");
9013 Info.RTArgs.MappersArray = MappersArray;
9020 ConstantInt::get(Int64Ty, 0));
9022 for (
unsigned I = 0,
E = CombinedInfo.Sizes.size();
I <
E; ++
I) {
9025 if (IsNonContiguous &&
9026 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9027 CombinedInfo.Types[
I] &
9028 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG))
9030 ConstantInt::get(Int64Ty, CombinedInfo.NonContigInfo.Dims[
I]);
9036 RuntimeSizes.set(
I);
9039 if (RuntimeSizes.all()) {
9041 Info.RTArgs.SizesArray = Builder.CreateAlloca(
9042 SizeArrayType,
nullptr,
".offload_sizes");
9047 std::string
Name = createPlatformSpecificName({
"offload_sizes"});
9048 auto *SizesArrayGbl =
9053 if (!RuntimeSizes.any()) {
9054 Info.RTArgs.SizesArray = SizesArrayGbl;
9056 unsigned IndexSize = M.getDataLayout().getIndexSizeInBits(0);
9057 Align OffloadSizeAlign = M.getDataLayout().getABIIntegerTypeAlignment(64);
9060 SizeArrayType,
nullptr,
".offload_sizes");
9063 Builder.CreateMemCpy(
9064 Buffer, M.getDataLayout().getPrefTypeAlign(Buffer->
getType()),
9065 SizesArrayGbl, OffloadSizeAlign,
9070 Info.RTArgs.SizesArray = Buffer;
9078 for (
auto mapFlag : CombinedInfo.Types)
9080 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9082 std::string MaptypesName = createPlatformSpecificName({
"offload_maptypes"});
9083 auto *MapTypesArrayGbl = createOffloadMaptypes(Mapping, MaptypesName);
9084 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
9087 if (!CombinedInfo.Names.empty()) {
9088 auto *MapNamesArrayGbl = createOffloadMapnames(
9089 CombinedInfo.Names, createPlatformSpecificName({
"offload_mapnames"}));
9090 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
9091 Info.EmitDebug =
true;
9093 Info.RTArgs.MapNamesArray =
9095 Info.EmitDebug =
false;
9100 if (
Info.separateBeginEndCalls()) {
9101 bool EndMapTypesDiffer =
false;
9103 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9104 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
9105 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9106 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9107 EndMapTypesDiffer =
true;
9110 if (EndMapTypesDiffer) {
9111 MapTypesArrayGbl = createOffloadMaptypes(Mapping, MaptypesName);
9112 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
9117 for (
unsigned I = 0;
I <
Info.NumberOfPtrs; ++
I) {
9118 Value *BPVal = CombinedInfo.BasePointers[
I];
9119 Value *BP = Builder.CreateConstInBoundsGEP2_32(
9122 Builder.CreateAlignedStore(BPVal, BP,
9123 M.getDataLayout().getPrefTypeAlign(PtrTy));
9125 if (
Info.requiresDevicePointerInfo()) {
9126 if (CombinedInfo.DevicePointers[
I] == DeviceInfoTy::Pointer) {
9127 CodeGenIP = Builder.saveIP();
9128 Builder.restoreIP(AllocaIP);
9129 Info.DevicePtrInfoMap[BPVal] = {BP, Builder.CreateAlloca(PtrTy)};
9130 Builder.restoreIP(CodeGenIP);
9132 DeviceAddrCB(
I,
Info.DevicePtrInfoMap[BPVal].second);
9133 }
else if (CombinedInfo.DevicePointers[
I] == DeviceInfoTy::Address) {
9134 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
9136 DeviceAddrCB(
I, BP);
9140 Value *PVal = CombinedInfo.Pointers[
I];
9141 Value *
P = Builder.CreateConstInBoundsGEP2_32(
9145 Builder.CreateAlignedStore(PVal,
P,
9146 M.getDataLayout().getPrefTypeAlign(PtrTy));
9148 if (RuntimeSizes.test(
I)) {
9149 Value *S = Builder.CreateConstInBoundsGEP2_32(
9153 Builder.CreateAlignedStore(Builder.CreateIntCast(CombinedInfo.Sizes[
I],
9156 S, M.getDataLayout().getPrefTypeAlign(PtrTy));
9159 unsigned IndexSize = M.getDataLayout().getIndexSizeInBits(0);
9162 auto CustomMFunc = CustomMapperCB(
I);
9164 return CustomMFunc.takeError();
9166 MFunc = Builder.CreatePointerCast(*CustomMFunc, PtrTy);
9168 Value *MAddr = Builder.CreateInBoundsGEP(
9170 {Builder.getIntN(IndexSize, 0), Builder.getIntN(IndexSize, I)});
9171 Builder.CreateAlignedStore(
9172 MFunc, MAddr, M.getDataLayout().getPrefTypeAlign(MAddr->
getType()));
9175 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9176 Info.NumberOfPtrs == 0)
9178 emitNonContiguousDescriptor(AllocaIP, CodeGenIP, CombinedInfo,
Info);
9183 BasicBlock *CurBB = Builder.GetInsertBlock();
9190 Builder.CreateBr(
Target);
9193 Builder.ClearInsertionPoint();
9198 BasicBlock *CurBB = Builder.GetInsertBlock();
9214 Builder.SetInsertPoint(BB);
9217Error OpenMPIRBuilder::emitIfClause(
Value *
Cond, BodyGenCallbackTy ThenGen,
9218 BodyGenCallbackTy ElseGen,
9219 InsertPointTy AllocaIP) {
9223 auto CondConstant = CI->getSExtValue();
9225 return ThenGen(AllocaIP, Builder.saveIP());
9227 return ElseGen(AllocaIP, Builder.saveIP());
9237 Builder.CreateCondBr(
Cond, ThenBlock, ElseBlock);
9239 emitBlock(ThenBlock, CurFn);
9240 if (
Error Err = ThenGen(AllocaIP, Builder.saveIP()))
9242 emitBranch(ContBlock);
9245 emitBlock(ElseBlock, CurFn);
9246 if (
Error Err = ElseGen(AllocaIP, Builder.saveIP()))
9249 emitBranch(ContBlock);
9251 emitBlock(ContBlock, CurFn,
true);
9255bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
9259 "Unexpected Atomic Ordering.");
9316OpenMPIRBuilder::InsertPointTy
9317OpenMPIRBuilder::createAtomicRead(
const LocationDescription &
Loc,
9318 AtomicOpValue &
X, AtomicOpValue &V,
9320 if (!updateToLocation(
Loc))
9323 assert(
X.Var->getType()->isPointerTy() &&
9324 "OMP Atomic expects a pointer to target memory");
9325 Type *XElemTy =
X.ElemTy;
9328 "OMP atomic read expected a scalar type");
9330 Value *XRead =
nullptr;
9334 Builder.CreateLoad(XElemTy,
X.Var,
X.IsVolatile,
"omp.atomic.read");
9340 LoadInst *OldVal = Builder.CreateLoad(XElemTy,
X.Var,
"omp.atomic.read");
9343 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
9344 OpenMPIRBuilder::AtomicInfo atomicInfo(
9345 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9346 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
9347 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
9348 XRead = AtomicLoadRes.first;
9355 Builder.CreateLoad(IntCastTy,
X.Var,
X.IsVolatile,
"omp.atomic.load");
9358 XRead = Builder.CreateBitCast(XLoad, XElemTy,
"atomic.flt.cast");
9360 XRead = Builder.CreateIntToPtr(XLoad, XElemTy,
"atomic.ptr.cast");
9363 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Read);
9364 Builder.CreateStore(XRead, V.Var, V.IsVolatile);
9365 return Builder.saveIP();
9368OpenMPIRBuilder::InsertPointTy
9369OpenMPIRBuilder::createAtomicWrite(
const LocationDescription &
Loc,
9370 AtomicOpValue &
X,
Value *Expr,
9372 if (!updateToLocation(
Loc))
9375 assert(
X.Var->getType()->isPointerTy() &&
9376 "OMP Atomic expects a pointer to target memory");
9377 Type *XElemTy =
X.ElemTy;
9380 "OMP atomic write expected a scalar type");
9383 StoreInst *XSt = Builder.CreateStore(Expr,
X.Var,
X.IsVolatile);
9386 LoadInst *OldVal = Builder.CreateLoad(XElemTy,
X.Var,
"omp.atomic.read");
9388 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
9389 OpenMPIRBuilder::AtomicInfo atomicInfo(
9390 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9391 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
9392 atomicInfo.EmitAtomicStoreLibcall(AO, Expr);
9399 Builder.CreateBitCast(Expr, IntCastTy,
"atomic.src.int.cast");
9400 StoreInst *XSt = Builder.CreateStore(ExprCast,
X.Var,
X.IsVolatile);
9404 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Write);
9405 return Builder.saveIP();
9408OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicUpdate(
9409 const LocationDescription &
Loc, InsertPointTy AllocaIP, AtomicOpValue &
X,
9411 AtomicUpdateCallbackTy &UpdateOp,
bool IsXBinopExpr,
9412 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9414 if (!updateToLocation(
Loc))
9418 Type *XTy =
X.Var->getType();
9420 "OMP Atomic expects a pointer to target memory");
9421 Type *XElemTy =
X.ElemTy;
9424 "OMP atomic update expected a scalar type");
9427 "OpenMP atomic does not support LT or GT operations");
9431 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
X.IsVolatile,
9432 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
9434 return AtomicResult.takeError();
9435 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Update);
9436 return Builder.saveIP();
9440Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
9444 return Builder.CreateAdd(Src1, Src2);
9446 return Builder.CreateSub(Src1, Src2);
9448 return Builder.CreateAnd(Src1, Src2);
9450 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
9452 return Builder.CreateOr(Src1, Src2);
9454 return Builder.CreateXor(Src1, Src2);
9479 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr,
9480 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9483 bool emitRMWOp =
false;
9491 emitRMWOp = XElemTy;
9494 emitRMWOp = (IsXBinopExpr && XElemTy);
9501 std::pair<Value *, Value *> Res;
9506 if (IsIgnoreDenormalMode)
9507 RMWInst->
setMetadata(
"amdgpu.ignore.denormal.mode",
9509 if (!IsFineGrainedMemory)
9510 RMWInst->
setMetadata(
"amdgpu.no.fine.grained.memory",
9512 if (!IsRemoteMemory)
9516 Res.first = RMWInst;
9521 Res.second = Res.first;
9523 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
9527 Builder.CreateLoad(XElemTy,
X,
X->getName() +
".atomic.load");
9533 OpenMPIRBuilder::AtomicInfo atomicInfo(
9534 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9535 OldVal->
getAlign(),
true , AllocaIP,
X);
9536 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
9537 BasicBlock *CurBB = Builder.GetInsertBlock();
9539 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9543 X->getName() +
".atomic.cont");
9545 Builder.restoreIP(AllocaIP);
9546 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
9547 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
9548 Builder.SetInsertPoint(ContBB);
9550 PHI->addIncoming(AtomicLoadRes.first, CurBB);
9555 Value *Upd = *CBResult;
9556 Builder.CreateStore(Upd, NewAtomicAddr);
9559 auto Result = atomicInfo.EmitAtomicCompareExchangeLibcall(
9560 AtomicLoadRes.second, NewAtomicAddr, AO, Failure);
9562 PHI->addIncoming(PHILoad, Builder.GetInsertBlock());
9563 Builder.CreateCondBr(
Result.second, ExitBB, ContBB);
9565 Res.first = OldExprVal;
9571 Builder.SetInsertPoint(ExitBB);
9573 Builder.SetInsertPoint(ExitTI);
9579 Builder.CreateLoad(IntCastTy,
X,
X->getName() +
".atomic.load");
9586 BasicBlock *CurBB = Builder.GetInsertBlock();
9588 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9592 X->getName() +
".atomic.cont");
9594 Builder.restoreIP(AllocaIP);
9595 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
9596 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
9597 Builder.SetInsertPoint(ContBB);
9599 PHI->addIncoming(OldVal, CurBB);
9604 OldExprVal = Builder.CreateBitCast(
PHI, XElemTy,
9605 X->getName() +
".atomic.fltCast");
9607 OldExprVal = Builder.CreateIntToPtr(
PHI, XElemTy,
9608 X->getName() +
".atomic.ptrCast");
9615 Value *Upd = *CBResult;
9616 Builder.CreateStore(Upd, NewAtomicAddr);
9617 LoadInst *DesiredVal = Builder.CreateLoad(IntCastTy, NewAtomicAddr);
9622 Result->setVolatile(VolatileX);
9623 Value *PreviousVal = Builder.CreateExtractValue(Result, 0);
9624 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, 1);
9625 PHI->addIncoming(PreviousVal, Builder.GetInsertBlock());
9626 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
9628 Res.first = OldExprVal;
9635 Builder.SetInsertPoint(ExitBB);
9637 Builder.SetInsertPoint(ExitTI);
9644OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
9645 const LocationDescription &
Loc, InsertPointTy AllocaIP, AtomicOpValue &
X,
9648 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr,
9649 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9650 if (!updateToLocation(
Loc))
9654 Type *XTy =
X.Var->getType();
9656 "OMP Atomic expects a pointer to target memory");
9657 Type *XElemTy =
X.ElemTy;
9660 "OMP atomic capture expected a scalar type");
9662 "OpenMP atomic does not support LT or GT operations");
9669 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
X.IsVolatile,
9670 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
9673 Value *CapturedVal =
9674 (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
9675 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
9677 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Capture);
9678 return Builder.saveIP();
9681OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
9682 const LocationDescription &
Loc, AtomicOpValue &
X, AtomicOpValue &V,
9688 return createAtomicCompare(
Loc,
X, V, R,
E,
D, AO,
Op, IsXBinopExpr,
9689 IsPostfixUpdate, IsFailOnly, Failure);
9692OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
9693 const LocationDescription &
Loc, AtomicOpValue &
X, AtomicOpValue &V,
9698 if (!updateToLocation(
Loc))
9701 assert(
X.Var->getType()->isPointerTy() &&
9702 "OMP atomic expects a pointer to target memory");
9705 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
9706 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
9709 bool IsInteger =
E->getType()->isIntegerTy();
9711 if (
Op == OMPAtomicCompareOp::EQ) {
9716 Value *EBCast = Builder.CreateBitCast(
E, IntCastTy);
9717 Value *DBCast = Builder.CreateBitCast(
D, IntCastTy);
9722 Builder.CreateAtomicCmpXchg(
X.Var,
E,
D,
MaybeAlign(), AO, Failure);
9726 Value *OldValue = Builder.CreateExtractValue(Result, 0);
9728 OldValue = Builder.CreateBitCast(OldValue,
X.ElemTy);
9730 "OldValue and V must be of same type");
9731 if (IsPostfixUpdate) {
9732 Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
9734 Value *SuccessOrFail = Builder.CreateExtractValue(Result, 1);
9745 BasicBlock *CurBB = Builder.GetInsertBlock();
9747 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9749 CurBBTI,
X.Var->getName() +
".atomic.exit");
9755 Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
9757 Builder.SetInsertPoint(ContBB);
9758 Builder.CreateStore(OldValue, V.Var);
9759 Builder.CreateBr(ExitBB);
9764 Builder.SetInsertPoint(ExitBB);
9766 Builder.SetInsertPoint(ExitTI);
9769 Value *CapturedValue =
9770 Builder.CreateSelect(SuccessOrFail,
E, OldValue);
9771 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
9777 assert(
R.Var->getType()->isPointerTy() &&
9778 "r.var must be of pointer type");
9779 assert(
R.ElemTy->isIntegerTy() &&
"r must be of integral type");
9781 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, 1);
9782 Value *ResultCast =
R.IsSigned
9783 ? Builder.CreateSExt(SuccessFailureVal,
R.ElemTy)
9784 : Builder.CreateZExt(SuccessFailureVal,
R.ElemTy);
9785 Builder.CreateStore(ResultCast,
R.Var,
R.IsVolatile);
9788 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
9789 "Op should be either max or min at this point");
9790 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
9828 Builder.CreateAtomicRMW(NewOp,
X.Var,
E,
MaybeAlign(), AO);
9830 Value *CapturedValue =
nullptr;
9831 if (IsPostfixUpdate) {
9832 CapturedValue = OldValue;
9857 Value *NonAtomicCmp = Builder.CreateCmp(Pred, OldValue,
E);
9858 CapturedValue = Builder.CreateSelect(NonAtomicCmp,
E, OldValue);
9860 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
9864 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Compare);
9866 return Builder.saveIP();
9869OpenMPIRBuilder::InsertPointOrErrorTy
9870OpenMPIRBuilder::createTeams(
const LocationDescription &
Loc,
9871 BodyGenCallbackTy BodyGenCB,
Value *NumTeamsLower,
9874 if (!updateToLocation(
Loc))
9875 return InsertPointTy();
9878 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
9879 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
9884 if (&OuterAllocaBB == Builder.GetInsertBlock()) {
9885 BasicBlock *BodyBB = splitBB(Builder,
true,
"teams.entry");
9886 Builder.SetInsertPoint(BodyBB, BodyBB->
begin());
9906 BasicBlock *ExitBB = splitBB(Builder,
true,
"teams.exit");
9907 BasicBlock *BodyBB = splitBB(Builder,
true,
"teams.body");
9909 splitBB(Builder,
true,
"teams.alloca");
9911 bool SubClausesPresent =
9912 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
9914 if (!Config.isTargetDevice() && SubClausesPresent) {
9915 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
9916 "if lowerbound is non-null, then upperbound must also be non-null "
9917 "for bounds on num_teams");
9919 if (NumTeamsUpper ==
nullptr)
9920 NumTeamsUpper = Builder.getInt32(0);
9922 if (NumTeamsLower ==
nullptr)
9923 NumTeamsLower = NumTeamsUpper;
9927 "argument to if clause must be an integer value");
9931 IfExpr = Builder.CreateICmpNE(IfExpr,
9932 ConstantInt::get(IfExpr->
getType(), 0));
9933 NumTeamsUpper = Builder.CreateSelect(
9934 IfExpr, NumTeamsUpper, Builder.getInt32(1),
"numTeamsUpper");
9937 NumTeamsLower = Builder.CreateSelect(
9938 IfExpr, NumTeamsLower, Builder.getInt32(1),
"numTeamsLower");
9941 if (ThreadLimit ==
nullptr)
9942 ThreadLimit = Builder.getInt32(0);
9944 Value *ThreadNum = getOrCreateThreadID(Ident);
9946 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_teams_51),
9947 {Ident, ThreadNum, NumTeamsLower, NumTeamsUpper, ThreadLimit});
9950 InsertPointTy AllocaIP(AllocaBB, AllocaBB->
begin());
9951 InsertPointTy CodeGenIP(BodyBB, BodyBB->
begin());
9952 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
9956 OI.EntryBB = AllocaBB;
9958 OI.OuterAllocaBB = &OuterAllocaBB;
9962 InsertPointTy OuterAllocaIP(&OuterAllocaBB, OuterAllocaBB.
begin());
9964 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
9966 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
9968 auto HostPostOutlineCB = [
this, Ident,
9969 ToBeDeleted](
Function &OutlinedFn)
mutable {
9974 "there must be a single user for the outlined function");
9979 "Outlined function must have two or three arguments only");
9981 bool HasShared = OutlinedFn.
arg_size() == 3;
9989 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
9990 "outlined function.");
9991 Builder.SetInsertPoint(StaleCI);
9993 Ident, Builder.getInt32(StaleCI->
arg_size() - 2), &OutlinedFn};
9996 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
9997 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
10001 I->eraseFromParent();
10004 if (!Config.isTargetDevice())
10005 OI.PostOutlineCB = HostPostOutlineCB;
10007 addOutlineInfo(std::move(OI));
10009 Builder.SetInsertPoint(ExitBB, ExitBB->
begin());
10011 return Builder.saveIP();
10014OpenMPIRBuilder::InsertPointOrErrorTy
10015OpenMPIRBuilder::createDistribute(
const LocationDescription &
Loc,
10016 InsertPointTy OuterAllocaIP,
10017 BodyGenCallbackTy BodyGenCB) {
10018 if (!updateToLocation(
Loc))
10019 return InsertPointTy();
10021 BasicBlock *OuterAllocaBB = OuterAllocaIP.getBlock();
10023 if (OuterAllocaBB == Builder.GetInsertBlock()) {
10025 splitBB(Builder,
true,
"distribute.entry");
10026 Builder.SetInsertPoint(BodyBB, BodyBB->
begin());
10029 splitBB(Builder,
true,
"distribute.exit");
10031 splitBB(Builder,
true,
"distribute.body");
10033 splitBB(Builder,
true,
"distribute.alloca");
10036 InsertPointTy AllocaIP(AllocaBB, AllocaBB->
begin());
10037 InsertPointTy CodeGenIP(BodyBB, BodyBB->
begin());
10038 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
10043 if (Config.isTargetDevice()) {
10045 OI.OuterAllocaBB = OuterAllocaIP.getBlock();
10046 OI.EntryBB = AllocaBB;
10047 OI.ExitBB = ExitBB;
10049 addOutlineInfo(std::move(OI));
10051 Builder.SetInsertPoint(ExitBB, ExitBB->
begin());
10053 return Builder.saveIP();
10058 std::string VarName) {
10064 M, MapNamesArrayInit->
getType(),
10067 return MapNamesArrayGlobal;
10072void OpenMPIRBuilder::initializeTypes(
Module &M) {
10075 unsigned DefaultTargetAS = Config.getDefaultTargetAS();
10076 unsigned ProgramAS = M.getDataLayout().getProgramAddressSpace();
10077#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
10078#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
10079 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
10080 VarName##PtrTy = PointerType::get(Ctx, DefaultTargetAS);
10081#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
10082 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
10083 VarName##Ptr = PointerType::get(Ctx, ProgramAS);
10084#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
10085 T = StructType::getTypeByName(Ctx, StructName); \
10087 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
10089 VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS);
10090#include "llvm/Frontend/OpenMP/OMPKinds.def"
10093void OpenMPIRBuilder::OutlineInfo::collectBlocks(
10101 while (!Worklist.
empty()) {
10105 if (
BlockSet.insert(SuccBB).second)
10114 if (!Config.isGPU()) {
10129 Fn->
addFnAttr(
"uniform-work-group-size",
"true");
10130 Fn->
addFnAttr(Attribute::MustProgress);
10134void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata(
10135 EmitMetadataErrorReportFunctionTy &ErrorFn) {
10138 if (OffloadInfoManager.empty())
10142 SmallVector<std::pair<
const OffloadEntriesInfoManager::OffloadEntryInfo *,
10143 TargetRegionEntryInfo>,
10145 OrderedEntries(OffloadInfoManager.size());
10148 auto &&GetMDInt = [
this](
unsigned V) {
10155 NamedMDNode *MD = M.getOrInsertNamedMetadata(
"omp_offload.info");
10156 auto &&TargetRegionMetadataEmitter =
10157 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
10158 const TargetRegionEntryInfo &EntryInfo,
10159 const OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion &
E) {
10172 GetMDInt(
E.getKind()), GetMDInt(EntryInfo.DeviceID),
10173 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
10174 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
10175 GetMDInt(
E.getOrder())};
10178 OrderedEntries[
E.getOrder()] = std::make_pair(&
E, EntryInfo);
10184 OffloadInfoManager.actOnTargetRegionEntriesInfo(TargetRegionMetadataEmitter);
10187 auto &&DeviceGlobalVarMetadataEmitter =
10188 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
10190 const OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar &
E) {
10198 Metadata *
Ops[] = {GetMDInt(
E.getKind()), GetMDString(MangledName),
10199 GetMDInt(
E.getFlags()), GetMDInt(
E.getOrder())};
10202 TargetRegionEntryInfo varInfo(MangledName, 0, 0, 0);
10203 OrderedEntries[
E.getOrder()] = std::make_pair(&
E, varInfo);
10209 OffloadInfoManager.actOnDeviceGlobalVarEntriesInfo(
10210 DeviceGlobalVarMetadataEmitter);
10212 for (
const auto &
E : OrderedEntries) {
10213 assert(
E.first &&
"All ordered entries must exist!");
10214 if (
const auto *CE =
10217 if (!
CE->getID() || !
CE->getAddress()) {
10219 TargetRegionEntryInfo EntryInfo =
E.second;
10220 StringRef FnName = EntryInfo.ParentName;
10221 if (!M.getNamedValue(FnName))
10223 ErrorFn(EMIT_MD_TARGET_REGION_ERROR, EntryInfo);
10226 createOffloadEntry(
CE->getID(),
CE->getAddress(),
10229 }
else if (
const auto *CE =
dyn_cast<
10230 OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar>(
10232 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags =
10233 static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
>(
10236 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter:
10237 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo:
10238 if (Config.isTargetDevice() && Config.hasRequiresUnifiedSharedMemory())
10240 if (!
CE->getAddress()) {
10241 ErrorFn(EMIT_MD_DECLARE_TARGET_ERROR,
E.second);
10245 if (
CE->getVarSize() == 0)
10248 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink:
10249 assert(((Config.isTargetDevice() && !
CE->getAddress()) ||
10250 (!Config.isTargetDevice() &&
CE->getAddress())) &&
10251 "Declaret target link address is set.");
10252 if (Config.isTargetDevice())
10254 if (!
CE->getAddress()) {
10255 ErrorFn(EMIT_MD_GLOBAL_VAR_LINK_ERROR, TargetRegionEntryInfo());
10267 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
10268 Flags != OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10273 if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10274 createOffloadEntry(
CE->getAddress(),
CE->getAddress(),
CE->getVarSize(),
10275 Flags,
CE->getLinkage(),
CE->getVarName());
10277 createOffloadEntry(
CE->getAddress(),
CE->getAddress(),
CE->getVarSize(),
10278 Flags,
CE->getLinkage());
10289 if (Config.hasRequiresFlags() && !Config.isTargetDevice())
10294 OffloadEntriesInfoManager::OMPTargetGlobalRegisterRequires,
10295 Config.getRequiresFlags());
10298void TargetRegionEntryInfo::getTargetRegionEntryFnName(
10300 unsigned FileID,
unsigned Line,
unsigned Count) {
10302 OS << KernelNamePrefix <<
llvm::format(
"%x", DeviceID)
10303 <<
llvm::format(
"_%x_", FileID) << ParentName <<
"_l" << Line;
10305 OS <<
"_" <<
Count;
10308void OffloadEntriesInfoManager::getTargetRegionEntryFnName(
10310 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
10311 TargetRegionEntryInfo::getTargetRegionEntryFnName(
10312 Name, EntryInfo.ParentName, EntryInfo.DeviceID, EntryInfo.FileID,
10313 EntryInfo.Line, NewCount);
10316TargetRegionEntryInfo
10317OpenMPIRBuilder::getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack,
10321 auto FileIDInfo = CallBack();
10325 FileID =
Status->getUniqueID().getFile();
10329 FileID =
hash_value(std::get<0>(FileIDInfo));
10332 return TargetRegionEntryInfo(ParentName,
ID.getDevice(), FileID,
10333 std::get<1>(FileIDInfo));
10336unsigned OpenMPIRBuilder::getFlagMemberOffset() {
10339 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10341 !(Remain & 1); Remain = Remain >> 1)
10347OpenMPIRBuilder::getMemberOfFlag(
unsigned Position) {
10350 << getFlagMemberOffset());
10353void OpenMPIRBuilder::setCorrectMemberOfFlag(
10359 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10361 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10368 Flags &=
~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
10369 Flags |= MemberOfFlag;
10372Constant *OpenMPIRBuilder::getAddrOfDeclareTargetVar(
10373 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
10374 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
10375 bool IsDeclaration,
bool IsExternallyVisible,
10376 TargetRegionEntryInfo EntryInfo,
StringRef MangledName,
10377 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
10378 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
10379 std::function<
Constant *()> GlobalInitializer,
10386 if (CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink ||
10387 ((CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo ||
10389 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter) &&
10390 Config.hasRequiresUnifiedSharedMemory())) {
10395 if (!IsExternallyVisible)
10396 OS <<
format(
"_%x", EntryInfo.FileID);
10397 OS <<
"_decl_tgt_ref_ptr";
10400 Value *
Ptr = M.getNamedValue(PtrName);
10404 Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName);
10409 if (!Config.isTargetDevice()) {
10410 if (GlobalInitializer)
10411 GV->setInitializer(GlobalInitializer());
10416 registerTargetGlobalVariable(
10417 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
10418 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
10428void OpenMPIRBuilder::registerTargetGlobalVariable(
10429 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
10430 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
10431 bool IsDeclaration,
bool IsExternallyVisible,
10432 TargetRegionEntryInfo EntryInfo,
StringRef MangledName,
10433 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
10434 std::vector<Triple> TargetTriple,
10435 std::function<
Constant *()> GlobalInitializer,
10438 if (DeviceClause != OffloadEntriesInfoManager::OMPTargetDeviceClauseAny ||
10439 (TargetTriple.empty() && !Config.isTargetDevice()))
10442 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags;
10447 if ((CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo ||
10449 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter) &&
10450 !Config.hasRequiresUnifiedSharedMemory()) {
10451 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
10455 if (!IsDeclaration)
10457 M.getDataLayout().getTypeSizeInBits(LlvmVal->
getValueType()), 8);
10464 if (Config.isTargetDevice() &&
10468 if (!OffloadInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10471 std::string RefName = createPlatformSpecificName({
VarName,
"ref"});
10473 if (!M.getNamedValue(RefName)) {
10475 getOrCreateInternalVariable(Addr->
getType(), RefName);
10477 GvAddrRef->setConstant(
true);
10479 GvAddrRef->setInitializer(Addr);
10480 GeneratedRefs.push_back(GvAddrRef);
10484 if (CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink)
10485 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
10487 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
10489 if (Config.isTargetDevice()) {
10493 Addr = getAddrOfDeclareTargetVar(
10494 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
10495 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
10496 LlvmPtrTy, GlobalInitializer, VariableLinkage);
10499 VarSize = M.getDataLayout().getPointerSize();
10503 OffloadInfoManager.registerDeviceGlobalVarEntryInfo(VarName, Addr, VarSize,
10509void OpenMPIRBuilder::loadOffloadInfoMetadata(
Module &M) {
10513 NamedMDNode *MD = M.getNamedMetadata(ompOffloadInfoName);
10518 auto &&GetMDInt = [MN](
unsigned Idx) {
10523 auto &&GetMDString = [MN](
unsigned Idx) {
10525 return V->getString();
10528 switch (GetMDInt(0)) {
10532 case OffloadEntriesInfoManager::OffloadEntryInfo::
10533 OffloadingEntryInfoTargetRegion: {
10534 TargetRegionEntryInfo EntryInfo(GetMDString(3),
10539 OffloadInfoManager.initializeTargetRegionEntryInfo(EntryInfo,
10543 case OffloadEntriesInfoManager::OffloadEntryInfo::
10544 OffloadingEntryInfoDeviceGlobalVar:
10545 OffloadInfoManager.initializeDeviceGlobalVarEntryInfo(
10547 static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
>(
10557 if (HostFilePath.
empty())
10561 if (std::error_code Err = Buf.getError()) {
10563 "OpenMPIRBuilder: " +
10571 if (std::error_code Err = M.getError()) {
10573 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
10577 loadOffloadInfoMetadata(*M.get());
10584bool OffloadEntriesInfoManager::empty()
const {
10585 return OffloadEntriesTargetRegion.empty() &&
10586 OffloadEntriesDeviceGlobalVar.empty();
10589unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
10590 const TargetRegionEntryInfo &EntryInfo)
const {
10591 auto It = OffloadEntriesTargetRegionCount.find(
10592 getTargetRegionEntryCountKey(EntryInfo));
10593 if (It == OffloadEntriesTargetRegionCount.end())
10598void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
10599 const TargetRegionEntryInfo &EntryInfo) {
10600 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
10601 EntryInfo.Count + 1;
10605void OffloadEntriesInfoManager::initializeTargetRegionEntryInfo(
10606 const TargetRegionEntryInfo &EntryInfo,
unsigned Order) {
10607 OffloadEntriesTargetRegion[EntryInfo] =
10608 OffloadEntryInfoTargetRegion(Order,
nullptr,
nullptr,
10609 OMPTargetRegionEntryTargetRegion);
10610 ++OffloadingEntriesNum;
10613void OffloadEntriesInfoManager::registerTargetRegionEntryInfo(
10615 OMPTargetRegionEntryKind Flags) {
10616 assert(EntryInfo.Count == 0 &&
"expected default EntryInfo");
10619 EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo);
10623 if (OMPBuilder->Config.isTargetDevice()) {
10625 if (!hasTargetRegionEntryInfo(EntryInfo)) {
10628 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
10629 Entry.setAddress(Addr);
10631 Entry.setFlags(Flags);
10633 if (Flags == OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion &&
10634 hasTargetRegionEntryInfo(EntryInfo,
true))
10636 assert(!hasTargetRegionEntryInfo(EntryInfo) &&
10637 "Target region entry already registered!");
10638 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr,
ID, Flags);
10639 OffloadEntriesTargetRegion[EntryInfo] = Entry;
10640 ++OffloadingEntriesNum;
10642 incrementTargetRegionEntryInfoCount(EntryInfo);
10645bool OffloadEntriesInfoManager::hasTargetRegionEntryInfo(
10646 TargetRegionEntryInfo EntryInfo,
bool IgnoreAddressId)
const {
10649 EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo);
10651 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
10652 if (It == OffloadEntriesTargetRegion.end()) {
10656 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
10661void OffloadEntriesInfoManager::actOnTargetRegionEntriesInfo(
10662 const OffloadTargetRegionEntryInfoActTy &Action) {
10664 for (
const auto &It : OffloadEntriesTargetRegion) {
10665 Action(It.first, It.second);
10669void OffloadEntriesInfoManager::initializeDeviceGlobalVarEntryInfo(
10670 StringRef Name, OMPTargetGlobalVarEntryKind Flags,
unsigned Order) {
10671 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
10672 ++OffloadingEntriesNum;
10675void OffloadEntriesInfoManager::registerDeviceGlobalVarEntryInfo(
10678 if (OMPBuilder->Config.isTargetDevice()) {
10680 if (!hasDeviceGlobalVarEntryInfo(VarName))
10682 auto &Entry = OffloadEntriesDeviceGlobalVar[
VarName];
10683 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
10684 if (Entry.getVarSize() == 0) {
10685 Entry.setVarSize(VarSize);
10690 Entry.setVarSize(VarSize);
10692 Entry.setAddress(Addr);
10694 if (hasDeviceGlobalVarEntryInfo(VarName)) {
10695 auto &Entry = OffloadEntriesDeviceGlobalVar[
VarName];
10696 assert(Entry.isValid() && Entry.getFlags() == Flags &&
10697 "Entry not initialized!");
10698 if (Entry.getVarSize() == 0) {
10699 Entry.setVarSize(VarSize);
10704 if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10705 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
10706 Addr, VarSize, Flags,
Linkage,
10709 OffloadEntriesDeviceGlobalVar.try_emplace(
10710 VarName, OffloadingEntriesNum, Addr, VarSize, Flags,
Linkage,
"");
10711 ++OffloadingEntriesNum;
10715void OffloadEntriesInfoManager::actOnDeviceGlobalVarEntriesInfo(
10716 const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
10718 for (
const auto &
E : OffloadEntriesDeviceGlobalVar)
10719 Action(
E.getKey(),
E.getValue());
10726void CanonicalLoopInfo::collectControlBlocks(
10733 BBs.
append({getPreheader(), Header,
Cond, Latch,
Exit, getAfter()});
10736BasicBlock *CanonicalLoopInfo::getPreheader()
const {
10745void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
10757void CanonicalLoopInfo::mapIndVar(
10767 for (
Use &U : OldIV->
uses()) {
10771 if (
User->getParent() == getCond())
10773 if (
User->getParent() == getLatch())
10779 Value *NewIV = Updater(OldIV);
10782 for (
Use *U : ReplacableUses)
10790void CanonicalLoopInfo::assertOK()
const {
10803 "Preheader must terminate with unconditional branch");
10805 "Preheader must jump to header");
10809 "Header must terminate with unconditional branch");
10810 assert(Header->getSingleSuccessor() ==
Cond &&
10811 "Header must jump to exiting block");
10814 assert(
Cond->getSinglePredecessor() == Header &&
10815 "Exiting block only reachable from header");
10818 "Exiting block must terminate with conditional branch");
10820 "Exiting block must have two successors");
10822 "Exiting block's first successor jump to the body");
10824 "Exiting block's second successor must exit the loop");
10828 "Body only reachable from exiting block");
10833 "Latch must terminate with unconditional branch");
10842 "Exit block must terminate with unconditional branch");
10843 assert(
Exit->getSingleSuccessor() == After &&
10844 "Exit block must jump to after block");
10848 "After block only reachable from exit block");
10852 assert(IndVar &&
"Canonical induction variable not found?");
10854 "Induction variable must be an integer");
10856 "Induction variable must be a PHI in the loop header");
10862 auto *NextIndVar =
cast<PHINode>(IndVar)->getIncomingValue(1);
10869 Value *TripCount = getTripCount();
10870 assert(TripCount &&
"Loop trip count not found?");
10872 "Trip count and induction variable must have the same type");
10876 "Exit condition must be a signed less-than comparison");
10878 "Exit condition must compare the induction variable");
10880 "Exit condition must compare with the trip count");
10884void CanonicalLoopInfo::invalidate() {
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
uint64_t IntrinsicInst * II
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static Value * removeASCastIfPresent(Value *V)
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Value *TripCount, Function &LoopBodyFn, bool NoLoop)
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
static FunctionCallee getKmpcDistForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Value * createFakeIntVal(IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, llvm::SmallVectorImpl< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static Expected< Function * > createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void FixupDebugInfoForOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, Function *Func, DenseMap< Value *, std::tuple< Value *, unsigned > > &ValueReplacementMap)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static void addSimdMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause)
Determine the schedule type using schedule and ordering clause arguments.
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType, bool NoLoop)
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static llvm::CallInst * emitNoUnwindRuntimeCall(IRBuilder<> &Builder, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args, const llvm::Twine &Name)
static Error populateReductionFunction(Function *ReductionFunc, ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, IRBuilder<> &Builder, ArrayRef< bool > IsByRef, bool IsGPU)
static Function * getFreshReductionFunc(Module &M)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static void checkReductionInfos(ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, bool IsGPU)
static Type * getOffloadingArrayType(Value *V)
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::TargetDataInfo &Info, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, Function *OutlinedFn, Constant *OutlinedFnID, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB, const SmallVector< llvm::OpenMPIRBuilder::DependData > &Dependencies, bool HasNoWait)
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static StructType * createTaskWithPrivatesTy(OpenMPIRBuilder &OMPIRBuilder, ArrayRef< Value * > OffloadingArraysToPrivatize)
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static Value * emitTaskDependencies(OpenMPIRBuilder &OMPBuilder, const SmallVectorImpl< OpenMPIRBuilder::DependData > &Dependencies)
static Error emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void updateNVPTXAttr(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OpenMPIRBuilder::InsertPointTy getInsertPointAfterInstr(Instruction *I)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static void restoreIPandDebugLoc(llvm::IRBuilderBase &Builder, llvm::IRBuilderBase::InsertPoint IP)
This is wrapper over IRBuilderBase::restoreIP that also restores the current debug location to the la...
static LoadInst * loadSharedDataFromTaskDescriptor(OpenMPIRBuilder &OMPIRBuilder, IRBuilderBase &Builder, Value *TaskWithPrivates, Type *TaskWithPrivatesTy)
Given a task descriptor, TaskWithPrivates, return the pointer to the block of pointers containing sha...
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
static Function * emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI, StructType *PrivatesTy, StructType *TaskWithPrivatesTy, const size_t NumOffloadingArrays, const int SharedArgsOperandNo)
Create an entry point for a target task with the following.
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
unsigned unsigned DefaultVal
std::unordered_set< BasicBlock * > BlockSet
This file implements the SmallBitVector class.
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Defines the virtual file system interface vfs::FileSystem.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
This class represents an incoming formal argument to a Function.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
LLVM_ABI AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
This class holds the attributes for a particular argument, parameter, function, or return value.
LLVM_ABI AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
LLVM_ABI AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
LLVM Basic Block Representation.
LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
reverse_iterator rbegin()
const Instruction & back() const
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
InstListType::reverse_iterator reverse_iterator
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Conditional or Unconditional Branch instruction.
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getTruncOrBitCast(Constant *C, Type *Ty)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getSizeOf(Type *Ty)
getSizeOf constant expr - computes the (alloc) size of a type (in address-units, not bits) in a targe...
static LLVM_ABI Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DILocalScope * getScope() const
Get the local scope for this variable.
DINodeArray getAnnotations() const
Subprogram description. Uses SubclassData1.
uint32_t getAlignInBits() const
StringRef getName() const
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Analysis pass which computes a DominatorTree.
LLVM_ABI DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
bool hasMetadata() const
Return true if this value has any metadata attached to it.
LLVM_ABI void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AppendingLinkage
Special purpose, only applies to global arrays.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
LLVM_ABI void setInitializer(Constant *InitVal)
setInitializer - Sets the initializer for this global variable, removing any existing initializer if ...
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void moveBeforePreserving(InstListType::iterator MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Analysis pass that exposes the LoopInfo for a function.
LLVM_ABI LoopInfo run(Function &F, FunctionAnalysisManager &AM)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
A Module instance is used to store all the information related to an LLVM module.
const Triple & getTargetTriple() const
Get the target triple which is a string describing the target host.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Analysis pass that exposes the ScalarEvolution for a function.
LLVM_ABI ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
A vector that has set insertion semantics.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool remove_if(UnaryPredicate P)
Remove elements that match the given predicate.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Type * getElementType(unsigned N) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
LLVM_ABI Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(const Triple &TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
ArchType getArch() const
Get the parsed architecture type of this triple.
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isVoidTy() const
Return true if this is 'void'.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
LLVM_ABI bool canUnroll() const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
A raw_ostream that writes to an SmallVector or SmallString.
The virtual file system interface.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false, bool IsText=true)
This is a convenience method that opens a file, gets its content and then closes the file.
virtual llvm::ErrorOr< Status > status(const Twine &Path)=0
Get the status of the entry at Path, if one exists.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
LLVM_ABI GlobalVariable * emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr, StringRef SectionName="llvm_offload_entries")
Create an offloading section struct used to register this global at runtime.
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
@ OMP_DEVICEID_UNDEF
Device ID if the device was not defined, runtime should get it from environment variables in the spec...
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
constexpr const GV & getAMDGPUGridValues()
static constexpr GV SPIRVGridValues
For generic SPIR-V GPUs.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
Function * Kernel
Summary of a kernel (=entry point for target offloading).
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
hash_code hash_value(const FixedPointSemantics &Val)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned getPointerAddressSpace(const Type *T)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
auto successors(const MachineBasicBlock *BB)
testing::Matcher< const detail::ErrorHolder & > Failed()
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
std::string utostr(uint64_t X, bool isNeg=false)
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool isa_and_nonnull(const Y &Val)
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
CodeGenOptLevel
Code generation optimization level.
LLVM_ABI bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ LLVM_MARK_AS_BITMASK_ENUM
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto predecessors(const MachineBasicBlock *BB)
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
Attempt to constant fold an insertvalue instruction with the specified operands and indices.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
bool to_integer(StringRef S, N &Num, unsigned Base=0)
Convert the string S to an integer of the specified type using the radix Base. If Base is 0,...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static const Target * lookupTarget(StringRef TripleStr, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...