24#define DEBUG_TYPE "lower-mem-intrinsics"
35 Value *OpSize,
unsigned OpSizeVal) {
38 return B.CreateAnd(Len, OpSizeVal - 1);
39 return B.CreateURem(Len, OpSize);
48 Value *RTLoopRemainder =
nullptr) {
51 return B.CreateSub(Len, RTLoopRemainder);
56struct LoopExpansionInfo {
61 Value *MainLoopIndex =
nullptr;
69 Value *ResidualLoopIndex =
nullptr;
72std::optional<uint64_t> getAverageMemOpLoopTripCount(
const MemIntrinsic &
I) {
75 if (std::optional<Function::ProfileCount> EC =
76 I.getFunction()->getEntryCount();
77 !EC || !
EC->getCount())
79 if (
const auto Len =
I.getLengthInBytes())
80 return Len->getZExtValue();
84 std::numeric_limits<uint32_t>::max(),
Total);
88 for (
const auto &
P : ProfData)
89 TripCount +=
P.Count *
P.Value;
90 return std::round(1.0 * TripCount /
Total);
130static LoopExpansionInfo
132 unsigned MainLoopStep,
unsigned ResidualLoopStep,
134 std::optional<uint64_t> ExpectedUnits) {
135 assert((ResidualLoopStep == 0 || MainLoopStep % ResidualLoopStep == 0) &&
136 "ResidualLoopStep must divide MainLoopStep if specified");
137 assert(ResidualLoopStep <= MainLoopStep &&
138 "ResidualLoopStep cannot be larger than MainLoopStep");
139 assert(MainLoopStep > 0 &&
"MainLoopStep must be non-zero");
140 LoopExpansionInfo LEI;
149 InsertBefore, BBNamePrefix +
"-post-expansion");
158 Type *LenType = Len->getType();
160 ConstantInt *CIMainLoopStep = ConstantInt::get(ILenType, MainLoopStep);
161 ConstantInt *Zero = ConstantInt::get(ILenType, 0U);
169 bool MustTakeMainLoop =
false;
170 bool MayTakeMainLoop =
true;
171 bool MustTakeResidualLoop =
false;
172 bool MayTakeResidualLoop =
true;
174 Value *LoopUnits = Len;
175 Value *ResidualUnits =
nullptr;
176 if (MainLoopStep != 1) {
178 uint64_t TotalUnits = CLen->getZExtValue();
180 uint64_t ResidualCount = TotalUnits - LoopEndCount;
181 LoopUnits = ConstantInt::get(LenType, LoopEndCount);
182 ResidualUnits = ConstantInt::get(LenType, ResidualCount);
183 MustTakeMainLoop = LoopEndCount > 0;
184 MayTakeMainLoop = MustTakeMainLoop;
185 MustTakeResidualLoop = ResidualCount > 0;
186 MayTakeResidualLoop = MustTakeResidualLoop;
192 CIMainLoopStep, MainLoopStep);
194 MainLoopStep, ResidualUnits);
197 MustTakeMainLoop = CLen->getZExtValue() > 0;
198 MayTakeMainLoop = MustTakeMainLoop;
203 assert((MayTakeMainLoop || MayTakeResidualLoop) &&
204 "At least one of the loops must be generated");
210 if (MayTakeMainLoop) {
212 ParentFunc, PostLoopBB);
217 LEI.MainLoopIndex = LoopIndex;
218 LoopIndex->
addIncoming(ConstantInt::get(LenType, 0U), PreLoopBB);
221 LoopIndex, ConstantInt::get(LenType, MainLoopStep));
231 LoopBuilder.
CreateICmpULT(NewIndex, LoopUnits), MainLoopBB, PostLoopBB);
233 if (ExpectedUnits.has_value()) {
234 uint64_t BackedgeTakenCount = ExpectedUnits.value() / MainLoopStep;
235 if (BackedgeTakenCount > 0)
236 BackedgeTakenCount -= 1;
247 bool ResidualLoopRequested =
248 ResidualLoopStep > 0 && ResidualLoopStep < MainLoopStep;
251 if (ResidualLoopRequested && MayTakeResidualLoop) {
265 if (MustTakeResidualLoop) {
267 PredOfResLoopBody = MainLoopBB;
279 ResidualLoopBB, PostLoopBB);
280 if (ExpectedUnits.has_value()) {
282 BR->setMetadata(LLVMContext::MD_prof,
289 PredOfResLoopBody = ResidualCondBB;
296 ResBuilder.
CreatePHI(LenType, 2,
"residual-loop-index");
297 ResidualIndex->
addIncoming(Zero, PredOfResLoopBody);
303 LEI.ResidualLoopIndex = ResBuilder.
CreateAdd(LoopUnits, ResidualIndex);
305 LEI.ResidualLoopIndex = ResidualIndex;
308 ResidualIndex, ConstantInt::get(LenType, ResidualLoopStep));
309 ResidualIndex->
addIncoming(ResNewIndex, ResidualLoopBB);
317 ResBuilder.
CreateICmpULT(ResNewIndex, ResidualUnits), ResidualLoopBB,
320 if (ExpectedUnits.has_value()) {
322 (ExpectedUnits.value() % MainLoopStep) / ResidualLoopStep;
323 if (BackedgeTakenCount > 0)
324 BackedgeTakenCount -= 1;
334 if (MustTakeMainLoop) {
338 PreLoopBuilder.
CreateBr(MainLoopBB);
339 }
else if (!MainLoopBB && ResidualLoopBB) {
340 if (MustTakeResidualLoop) {
343 PreLoopBuilder.
CreateBr(ResidualLoopBB);
349 PreLoopBuilder.
CreateICmpNE(ResidualUnits, Zero), ResidualLoopBB,
351 if (ExpectedUnits.has_value()) {
362 if (ResidualCondBB) {
365 FalseBB = ResidualCondBB;
366 }
else if (ResidualLoopBB) {
370 assert(MustTakeResidualLoop);
371 FalseBB = ResidualLoopBB;
375 PreLoopBuilder.
CreateICmpNE(LoopUnits, Zero), MainLoopBB, FalseBB);
377 if (ExpectedUnits.has_value()) {
393 bool SrcIsVolatile,
bool DstIsVolatile,
396 std::optional<uint32_t> AtomicElementSize,
397 std::optional<uint64_t> AverageTripCount) {
415 Type *LoopOpType =
TTI.getMemcpyLoopLoweringType(
416 Ctx, CopyLen, SrcAS, DstAS, SrcAlign, DstAlign, AtomicElementSize);
418 "Atomic memcpy lowering is not supported for vector operand type");
421 TypeSize LoopOpSize =
DL.getTypeStoreSize(LoopOpType);
422 assert(LoopOpSize.
isFixed() &&
"LoopOpType cannot be a scalable vector type");
423 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
424 "Atomic memcpy lowering is not supported for selected operand size");
430 if (LoopEndCount != 0) {
431 LoopExpansionInfo LEI =
433 "static-memcpy", AverageTripCount);
434 assert(LEI.MainLoopIP && LEI.MainLoopIndex &&
435 "Main loop should be generated for non-zero loop count");
449 LoopOpType, SrcGEP, PartSrcAlign, SrcIsVolatile);
452 Load->setMetadata(LLVMContext::MD_alias_scope,
458 Load, DstGEP, PartDstAlign, DstIsVolatile);
461 Store->setMetadata(LLVMContext::MD_noalias,
MDNode::get(Ctx, NewScope));
463 if (AtomicElementSize) {
467 assert(!LEI.ResidualLoopIP && !LEI.ResidualLoopIndex &&
468 "No residual loop was requested");
472 uint64_t BytesCopied = LoopEndCount;
474 if (RemainingBytes == 0)
479 TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
480 SrcAS, DstAS, SrcAlign, DstAlign,
483 for (
auto *OpTy : RemainingOps) {
487 TypeSize OperandSize =
DL.getTypeStoreSize(OpTy);
488 assert((!AtomicElementSize || OperandSize % *AtomicElementSize == 0) &&
489 "Atomic memcpy lowering is not supported for selected operand size");
492 Int8Type, SrcAddr, ConstantInt::get(TypeOfCopyLen, BytesCopied));
497 Load->setMetadata(LLVMContext::MD_alias_scope,
501 Int8Type, DstAddr, ConstantInt::get(TypeOfCopyLen, BytesCopied));
506 Store->setMetadata(LLVMContext::MD_noalias,
MDNode::get(Ctx, NewScope));
508 if (AtomicElementSize) {
512 BytesCopied += OperandSize;
515 "Bytes copied should match size in the call!");
520 Align SrcAlign,
Align DstAlign,
bool SrcIsVolatile,
bool DstIsVolatile,
522 std::optional<uint32_t> AtomicElementSize,
523 std::optional<uint64_t> AverageTripCount) {
529 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain(
"MemCopyDomain");
531 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
536 Type *LoopOpType =
TTI.getMemcpyLoopLoweringType(
537 Ctx, CopyLen, SrcAS, DstAS, SrcAlign, DstAlign, AtomicElementSize);
539 "Atomic memcpy lowering is not supported for vector operand type");
540 TypeSize LoopOpSize =
DL.getTypeStoreSize(LoopOpType);
541 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
542 "Atomic memcpy lowering is not supported for selected operand size");
546 Type *ResidualLoopOpType = AtomicElementSize
549 TypeSize ResidualLoopOpSize =
DL.getTypeStoreSize(ResidualLoopOpType);
550 assert(ResidualLoopOpSize == (AtomicElementSize ? *AtomicElementSize : 1) &&
551 "Store size is expected to match type size");
553 LoopExpansionInfo LEI =
555 "dynamic-memcpy", AverageTripCount);
556 assert(LEI.MainLoopIP && LEI.MainLoopIndex &&
557 "Main loop should be generated for unknown size copy");
569 MainLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, LEI.MainLoopIndex);
570 LoadInst *Load = MainLoopBuilder.CreateAlignedLoad(
571 LoopOpType, SrcGEP, PartSrcAlign, SrcIsVolatile);
574 Load->setMetadata(LLVMContext::MD_alias_scope,
MDNode::get(Ctx, NewScope));
577 MainLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, LEI.MainLoopIndex);
579 Load, DstGEP, PartDstAlign, DstIsVolatile);
584 if (AtomicElementSize) {
590 if (!LEI.ResidualLoopIP)
597 Value *ResSrcGEP = ResLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr,
598 LEI.ResidualLoopIndex);
599 LoadInst *ResLoad = ResLoopBuilder.CreateAlignedLoad(
600 ResidualLoopOpType, ResSrcGEP, ResSrcAlign, SrcIsVolatile);
606 Value *ResDstGEP = ResLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr,
607 LEI.ResidualLoopIndex);
608 StoreInst *ResStore = ResLoopBuilder.CreateAlignedStore(
609 ResLoad, ResDstGEP, ResDstAlign, DstIsVolatile);
614 if (AtomicElementSize) {
624static std::pair<Value *, Value *>
627 Value *ResAddr1 = Addr1;
628 Value *ResAddr2 = Addr2;
633 if (
TTI.isValidAddrSpaceCast(AS2, AS1))
634 ResAddr2 =
B.CreateAddrSpaceCast(Addr2, Addr1->
getType());
635 else if (
TTI.isValidAddrSpaceCast(AS1, AS2))
636 ResAddr1 =
B.CreateAddrSpaceCast(Addr1, Addr2->
getType());
639 "support addrspacecast");
641 return {ResAddr1, ResAddr2};
673 Align DstAlign,
bool SrcIsVolatile,
684 Type *LoopOpType =
TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAS, DstAS,
686 TypeSize LoopOpSize =
DL.getTypeStoreSize(LoopOpType);
688 bool LoopOpIsInt8 = LoopOpType == Int8Type;
692 bool RequiresResidual = !LoopOpIsInt8;
694 Type *ResidualLoopOpType = Int8Type;
695 TypeSize ResidualLoopOpSize =
DL.getTypeStoreSize(ResidualLoopOpType);
699 ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize);
701 ConstantInt::get(ILengthType, ResidualLoopOpSize);
702 ConstantInt *Zero = ConstantInt::get(ILengthType, 0);
708 Value *RuntimeLoopBytes = CopyLen;
709 Value *RuntimeLoopRemainder =
nullptr;
710 Value *SkipResidualCondition =
nullptr;
711 if (RequiresResidual) {
712 RuntimeLoopRemainder =
715 LoopOpSize, RuntimeLoopRemainder);
716 SkipResidualCondition =
717 PLBuilder.
CreateICmpEQ(RuntimeLoopRemainder, Zero,
"skip_residual");
719 Value *SkipMainCondition =
720 PLBuilder.
CreateICmpEQ(RuntimeLoopBytes, Zero,
"skip_main");
731 auto [CmpSrcAddr, CmpDstAddr] =
734 PLBuilder.
CreateICmpULT(CmpSrcAddr, CmpDstAddr,
"compare_src_dst");
737 &ThenTerm, &ElseTerm);
764 CopyBackwardsBB->
setName(
"memmove_copy_backwards");
766 CopyForwardBB->
setName(
"memmove_copy_forward");
768 ExitBB->
setName(
"memmove_done");
781 F->getContext(),
"memmove_bwd_main_loop",
F, CopyForwardBB);
787 if (RequiresResidual) {
790 F->getContext(),
"memmove_bwd_residual_loop",
F, MainLoopBB);
795 ResidualLoopPhi, CIResidualLoopOpSize,
"bwd_residual_index");
803 ResidualLoopOpType, LoadGEP, ResidualSrcAlign, SrcIsVolatile,
808 ResidualDstAlign, DstIsVolatile);
812 F->getContext(),
"memmove_bwd_middle",
F, MainLoopBB);
818 ResidualLoopBuilder.
CreateICmpEQ(ResidualIndex, RuntimeLoopBytes),
819 IntermediateBB, ResidualLoopBB);
821 ResidualLoopPhi->
addIncoming(ResidualIndex, ResidualLoopBB);
822 ResidualLoopPhi->
addIncoming(CopyLen, CopyBackwardsBB);
828 BrInst->setDebugLoc(DbgLoc);
831 PredBB = IntermediateBB;
839 MainLoopBuilder.
CreateSub(MainLoopPhi, CILoopOpSize,
"bwd_main_index");
843 LoopOpType, LoadGEP, PartSrcAlign, SrcIsVolatile,
"element");
851 MainLoopPhi->
addIncoming(RuntimeLoopBytes, PredBB);
856 SkipMainCondition, ExitBB, MainLoopBB, PredBBTerm->
getIterator());
857 BrInst->setDebugLoc(DbgLoc);
869 MainLoopBuilder.
CreatePHI(ILengthType, 0,
"fwd_main_index");
873 LoopOpType, LoadGEP, PartSrcAlign, SrcIsVolatile,
"element");
878 Value *MainIndex = MainLoopBuilder.
CreateAdd(MainLoopPhi, CILoopOpSize);
884 if (RequiresResidual)
890 MainLoopBuilder.
CreateICmpEQ(MainIndex, RuntimeLoopBytes), SuccessorBB,
897 BrInst->setDebugLoc(DbgLoc);
900 if (RequiresResidual) {
905 F->getContext(),
"memmove_fwd_residual_loop",
F, ExitBB);
906 IntermediateBuilder.
CreateCondBr(SkipResidualCondition, ExitBB,
913 ResidualLoopBuilder.
CreatePHI(ILengthType, 0,
"fwd_residual_index");
917 ResidualLoopOpType, LoadGEP, ResidualSrcAlign, SrcIsVolatile,
922 ResidualDstAlign, DstIsVolatile);
923 Value *ResidualIndex =
924 ResidualLoopBuilder.
CreateAdd(ResidualLoopPhi, CIResidualLoopOpSize);
926 ResidualLoopBuilder.
CreateICmpEQ(ResidualIndex, CopyLen), ExitBB,
928 ResidualLoopPhi->
addIncoming(ResidualIndex, ResidualLoopBB);
929 ResidualLoopPhi->
addIncoming(RuntimeLoopBytes, IntermediateBB);
940 Align DstAlign,
bool SrcIsVolatile,
955 Type *LoopOpType =
TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAS, DstAS,
957 TypeSize LoopOpSize =
DL.getTypeStoreSize(LoopOpType);
958 assert(LoopOpSize.
isFixed() &&
"LoopOpType cannot be a scalable vector type");
967 ConstantInt *Zero = ConstantInt::get(ILengthType, 0);
968 ConstantInt *LoopBound = ConstantInt::get(ILengthType, BytesCopiedInLoop);
969 ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize);
975 auto [CmpSrcAddr, CmpDstAddr] =
978 PLBuilder.
CreateICmpULT(CmpSrcAddr, CmpDstAddr,
"compare_src_dst");
981 &ThenTerm, &ElseTerm);
986 ExitBB->
setName(
"memmove_done");
998 TypeSize OperandSize =
DL.getTypeStoreSize(OpTy);
1004 Value *SrcGEP = Builder.CreateInBoundsGEP(
1005 Int8Type, SrcAddr, ConstantInt::get(TypeOfCopyLen, BytesCopied));
1007 Builder.CreateAlignedLoad(OpTy, SrcGEP, ResSrcAlign, SrcIsVolatile);
1008 Value *DstGEP = Builder.CreateInBoundsGEP(
1009 Int8Type, DstAddr, ConstantInt::get(TypeOfCopyLen, BytesCopied));
1010 Builder.CreateAlignedStore(Load, DstGEP, ResDstAlign, DstIsVolatile);
1011 BytesCopied += OperandSize;
1015 if (RemainingBytes != 0) {
1016 CopyBackwardsBB->
setName(
"memmove_bwd_residual");
1017 uint64_t BytesCopied = BytesCopiedInLoop;
1028 TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
1029 SrcAS, DstAS, PartSrcAlign,
1031 for (
auto *OpTy : RemainingOps) {
1035 GenerateResidualLdStPair(OpTy, BwdResBuilder, BytesCopied);
1038 if (BytesCopiedInLoop != 0) {
1041 if (RemainingBytes != 0) {
1045 PredBB = CopyBackwardsBB;
1047 CopyBackwardsBB->
setName(
"memmove_bwd_loop");
1052 Value *Index = LoopBuilder.
CreateSub(LoopPhi, CILoopOpSize,
"bwd_index");
1055 LoopOpType, LoadGEP, PartSrcAlign, SrcIsVolatile,
"element");
1073 if (BytesCopiedInLoop != 0) {
1074 CopyForwardBB->
setName(
"memmove_fwd_loop");
1077 if (RemainingBytes != 0) {
1080 "memmove_fwd_residual");
1081 FwdResidualBB = SuccBB;
1088 LoopOpType, LoadGEP, PartSrcAlign, SrcIsVolatile,
"element");
1103 if (RemainingBytes != 0) {
1104 uint64_t BytesCopied = BytesCopiedInLoop;
1111 TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
1112 SrcAS, DstAS, PartSrcAlign,
1114 for (
auto *OpTy : RemainingOps)
1115 GenerateResidualLdStPair(OpTy, FwdResBuilder, BytesCopied);
1123 TypeSize DstSize =
DL.getTypeStoreSize(DstType);
1125 TypeSize SetValueSize =
DL.getTypeStoreSize(SetValueType);
1126 assert(SetValueSize ==
DL.getTypeAllocSize(SetValueType) &&
1127 "Store size and alloc size of SetValue's type must match");
1128 assert(SetValueSize != 0 && DstSize % SetValueSize == 0 &&
1129 "DstType size must be a multiple of SetValue size");
1132 if (DstSize != SetValueSize) {
1141 B.CreateVectorSplat(DstSize / SetValueSize, Result,
"setvalue.splat");
1146 Result =
B.CreateBitCast(Result, DstType,
"setvalue.splat.cast");
1154 std::optional<uint64_t> AverageTripCount) {
1166 Type *TypeOfLen = Len->getType();
1170 Type *LoopOpType = Int8Type;
1174 LoopOpType =
TTI->getMemcpyLoopLoweringType(
1175 Ctx, Len, DstAS, DstAS, DstAlign, DstAlign, std::nullopt);
1177 TypeSize LoopOpSize =
DL.getTypeStoreSize(LoopOpType);
1178 assert(LoopOpSize.
isFixed() &&
"LoopOpType cannot be a scalable vector type");
1183 if (LoopEndCount != 0) {
1184 Value *SplatSetValue =
nullptr;
1194 InsertBefore, Len, LoopOpSize, 0,
"static-memset", AverageTripCount);
1195 assert(LEI.MainLoopIP && LEI.MainLoopIndex &&
1196 "Main loop should be generated for non-zero loop count");
1208 assert(!LEI.ResidualLoopIP && !LEI.ResidualLoopIndex &&
1209 "No residual loop was requested");
1213 uint64_t RemainingBytes = Len->getZExtValue() - BytesSet;
1214 if (RemainingBytes == 0)
1219 assert(
TTI &&
"there cannot be a residual loop without TTI");
1221 TTI->getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
1222 DstAS, DstAS, DstAlign, DstAlign,
1225 Type *PreviousOpTy =
nullptr;
1226 Value *SplatSetValue =
nullptr;
1227 for (
auto *OpTy : RemainingOps) {
1228 TypeSize OperandSize =
DL.getTypeStoreSize(OpTy);
1230 "Operand types cannot be scalable vector types");
1235 if (OpTy != PreviousOpTy)
1239 Int8Type, DstAddr, ConstantInt::get(TypeOfLen, BytesSet));
1242 BytesSet += OperandSize;
1243 PreviousOpTy = OpTy;
1245 assert(BytesSet == Len->getZExtValue() &&
1246 "Bytes set should match size in the call!");
1253 std::optional<uint64_t> AverageTripCount) {
1264 Type *LoopOpType = Int8Type;
1266 LoopOpType =
TTI->getMemcpyLoopLoweringType(
1267 Ctx, Len, DstAS, DstAS, DstAlign, DstAlign, std::nullopt);
1269 TypeSize LoopOpSize =
DL.getTypeStoreSize(LoopOpType);
1270 assert(LoopOpSize.
isFixed() &&
"LoopOpType cannot be a scalable vector type");
1272 Type *ResidualLoopOpType = Int8Type;
1273 TypeSize ResidualLoopOpSize =
DL.getTypeStoreSize(ResidualLoopOpType);
1281 LoopExpansionInfo LEI =
1283 "dynamic-memset", AverageTripCount);
1284 assert(LEI.MainLoopIP && LEI.MainLoopIndex &&
1285 "Main loop should be generated for unknown size memset");
1297 if (!LEI.ResidualLoopIP)
1305 LEI.ResidualLoopIndex);
1314 std::optional<uint64_t> AverageTripCount) {
1329 PreferredLoopOpType =
TTI->getMemcpyLoopLoweringType(
1330 Ctx, Len, DstAS, DstAS, DstAlign, DstAlign, std::nullopt);
1332 TypeSize PreferredLoopOpStoreSize =
DL.getTypeStoreSize(PreferredLoopOpType);
1334 "PreferredLoopOpType cannot be a scalable vector type");
1336 TypeSize PreferredLoopOpAllocSize =
DL.getTypeAllocSize(PreferredLoopOpType);
1339 TypeSize OriginalTypeStoreSize =
DL.getTypeStoreSize(OriginalType);
1340 TypeSize OriginalTypeAllocSize =
DL.getTypeAllocSize(OriginalType);
1351 unsigned MainLoopStep = 1;
1352 Type *MainLoopType = OriginalType;
1353 TypeSize MainLoopAllocSize = OriginalTypeAllocSize;
1354 unsigned ResidualLoopStep = 0;
1355 Type *ResidualLoopType =
nullptr;
1357 if (PreferredLoopOpStoreSize == PreferredLoopOpAllocSize &&
1358 OriginalTypeStoreSize == OriginalTypeAllocSize &&
1359 OriginalTypeStoreSize < PreferredLoopOpStoreSize &&
1360 PreferredLoopOpStoreSize % OriginalTypeStoreSize == 0) {
1363 MainLoopStep = PreferredLoopOpStoreSize / OriginalTypeStoreSize;
1364 MainLoopType = PreferredLoopOpType;
1365 MainLoopAllocSize = PreferredLoopOpStoreSize;
1367 ResidualLoopStep = 1;
1368 ResidualLoopType = OriginalType;
1373 LoopExpansionInfo LEI =
1375 "memset.pattern", AverageTripCount);
1379 if (LEI.MainLoopIP) {
1383 if (MainLoopType != OriginalType)
1395 if (!LEI.ResidualLoopIP)
1404 LEI.ResidualLoopIndex);
1409template <
typename T>
1413 const SCEV *DestSCEV = SE->
getSCEV(Memcpy->getRawDest());
1424 auto TripCount = getAverageMemOpLoopTripCount(*Memcpy);
1464 bool DstIsVolatile = SrcIsVolatile;
1470 if (SrcAS != DstAS) {
1471 if (!
TTI.addrspacesMayAlias(SrcAS, DstAS)) {
1474 auto AverageTripCount = getAverageMemOpLoopTripCount(*Memmove);
1477 Memmove, SrcAddr, DstAddr, CI, SrcAlign, DstAlign,
1478 SrcIsVolatile, DstIsVolatile,
1479 false,
TTI, std::nullopt, AverageTripCount);
1482 Memmove, SrcAddr, DstAddr, CopyLen, SrcAlign,
1483 DstAlign, SrcIsVolatile, DstIsVolatile,
1484 false,
TTI, std::nullopt, AverageTripCount);
1490 if (!(
TTI.isValidAddrSpaceCast(DstAS, SrcAS) ||
1491 TTI.isValidAddrSpaceCast(SrcAS, DstAS))) {
1496 dbgs() <<
"Do not know how to expand memmove between different "
1497 "address spaces\n");
1504 Memmove, SrcAddr, DstAddr, CI, SrcAlign, DstAlign,
1505 SrcIsVolatile, DstIsVolatile,
TTI);
1508 Memmove, SrcAddr, DstAddr, CopyLen, SrcAlign, DstAlign,
1509 SrcIsVolatile, DstIsVolatile,
TTI);
1516 auto AverageTripCount = getAverageMemOpLoopTripCount(*Memset);
1555 getAverageMemOpLoopTripCount(*Memset));
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static void SetValue(Value *V, GenericValue Val, ExecutionContext &SF)
static Value * createMemSetSplat(const DataLayout &DL, IRBuilderBase &B, Value *SetValue, Type *DstType)
Create a Value of DstType that consists of a sequence of copies of SetValue, using bitcasts and a vec...
static std::pair< Value *, Value * > tryInsertCastToCommonAddrSpace(IRBuilderBase &B, Value *Addr1, Value *Addr2, const TargetTransformInfo &TTI)
static void createMemSetPatternLoop(Instruction *InsertBefore, Value *DstAddr, Value *Len, Value *SetValue, Align DstAlign, bool IsVolatile, const TargetTransformInfo *TTI, std::optional< uint64_t > AverageTripCount)
static bool canOverlap(MemTransferBase< T > *Memcpy, ScalarEvolution *SE)
static void createMemMoveLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, ConstantInt *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, const TargetTransformInfo &TTI)
static void createMemSetLoopUnknownSize(Instruction *InsertBefore, Value *DstAddr, Value *Len, Value *SetValue, Align DstAlign, bool IsVolatile, const TargetTransformInfo *TTI, std::optional< uint64_t > AverageTripCount)
static Value * getRuntimeLoopRemainder(IRBuilderBase &B, Value *Len, Value *OpSize, unsigned OpSizeVal)
static void createMemSetLoopKnownSize(Instruction *InsertBefore, Value *DstAddr, ConstantInt *Len, Value *SetValue, Align DstAlign, bool IsVolatile, const TargetTransformInfo *TTI, std::optional< uint64_t > AverageTripCount)
static Value * getRuntimeLoopUnits(IRBuilderBase &B, Value *Len, Value *OpSize, unsigned OpSizeVal, Value *RTLoopRemainder=nullptr)
static LoopExpansionInfo insertLoopExpansion(Instruction *InsertBefore, Value *Len, unsigned MainLoopStep, unsigned ResidualLoopStep, StringRef BBNamePrefix, std::optional< uint64_t > ExpectedUnits)
Insert the control flow and loop counters for a memcpy/memset loop expansion.
static void createMemMoveLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, const TargetTransformInfo &TTI)
This file contains the declarations for profiling metadata utility functions.
This class represents any memcpy intrinsic i.e.
uint32_t getElementSizeInBytes() const
LLVM Basic Block Representation.
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Conditional Branch instruction.
static CondBrInst * Create(Value *Cond, BasicBlock *IfTrue, BasicBlock *IfFalse, InsertPosition InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
A parsed version of the target data layout string in and methods for querying it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Common base class shared among various IRBuilders.
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
CondBrInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
UnreachableInst * CreateUnreachable()
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
UncondBrInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
MDNode * createAnonymousAliasScope(MDNode *Domain, StringRef Name=StringRef())
Return metadata appropriate for an alias scope root node.
LLVM_ABI MDNode * createLikelyBranchWeights()
Return metadata containing two branch weights, with significant bias towards true destination.
MDNode * createAnonymousAliasScopeDomain(StringRef Name=StringRef())
Return metadata appropriate for an alias scope domain node.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
This class wraps the llvm.memcpy intrinsic.
Value * getLength() const
Value * getRawDest() const
MaybeAlign getDestAlign() const
This is the common base class for memset/memcpy/memmove.
This class wraps the llvm.memmove intrinsic.
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
This class wraps the llvm.experimental.memset.pattern intrinsic.
Common base class for all memory transfer intrinsics.
Value * getRawSource() const
Return the arguments to the instruction.
MaybeAlign getSourceAlign() const
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI bool isKnownPredicateAt(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringRef - Represent a constant reference to a string, i.e.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
constexpr ScalarTy getFixedValue() const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, ConstantInt *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, std::optional< uint32_t > AtomicCpySize=std::nullopt, std::optional< uint64_t > AverageTripCount=std::nullopt)
Emit a loop implementing the semantics of an llvm.memcpy whose size is a compile time constant.
FunctionAddr VTableAddr Value
cl::opt< bool > ProfcheckDisableMetadataFixes
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool expandMemMoveAsLoop(MemMoveInst *MemMove, const TargetTransformInfo &TTI)
Expand MemMove as a loop.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
LLVM_ABI void expandAtomicMemCpyAsLoop(AnyMemCpyInst *AtomicMemCpy, const TargetTransformInfo &TTI, ScalarEvolution *SE)
Expand AtomicMemCpy as a loop. AtomicMemCpy is not deleted.
LLVM_ABI void expandMemSetAsLoop(MemSetInst *MemSet, const TargetTransformInfo *TTI=nullptr)
Expand MemSet as a loop.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void expandMemSetPatternAsLoop(MemSetPatternInst *MemSet, const TargetTransformInfo *TTI=nullptr)
Expand MemSetPattern as a loop.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
LLVM_ABI void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI, ScalarEvolution *SE=nullptr)
Expand MemCpy as a loop. MemCpy is not deleted.
LLVM_ABI void setFittedBranchWeights(Instruction &I, ArrayRef< uint64_t > Weights, bool IsExpected, bool ElideAllZero=false)
Variant of setBranchWeights where the Weights will be fit first to uint32_t by shifting right.
LLVM_ABI void createMemCpyLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, std::optional< unsigned > AtomicSize=std::nullopt, std::optional< uint64_t > AverageTripCount=std::nullopt)
Emit a loop implementing the semantics of llvm.memcpy where the size is not a compile-time constant.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.