114#include <type_traits>
120#define DEBUG_TYPE "load-store-vectorizer"
122STATISTIC(NumVectorInstructions,
"Number of vector accesses generated");
123STATISTIC(NumScalarsVectorized,
"Number of scalar accesses vectorized");
133 std::tuple<
const Value * ,
139 const EqClassKey &K) {
142 <<
" of element size " << ElementSize <<
" bits in addrspace "
159 APInt OffsetFromLeader;
160 ChainElem(Instruction *Inst, APInt OffsetFromLeader)
161 : Inst(std::
move(Inst)), OffsetFromLeader(std::
move(OffsetFromLeader)) {}
165void sortChainInBBOrder(Chain &
C) {
166 sort(
C, [](
auto &
A,
auto &
B) {
return A.Inst->comesBefore(
B.Inst); });
169void sortChainInOffsetOrder(Chain &
C) {
170 sort(
C, [](
const auto &
A,
const auto &
B) {
171 if (
A.OffsetFromLeader !=
B.OffsetFromLeader)
172 return A.OffsetFromLeader.slt(
B.OffsetFromLeader);
173 return A.Inst->comesBefore(
B.Inst);
178 for (
const auto &
E :
C) {
179 dbgs() <<
" " << *
E.Inst <<
" (offset " <<
E.OffsetFromLeader <<
")\n";
183using EquivalenceClassMap =
187constexpr unsigned StackAdjustedAlignment = 4;
191 for (
const ChainElem &
E :
C)
198 return LI !=
nullptr && LI->
hasMetadata(LLVMContext::MD_invariant_load);
208 while (!Worklist.
empty()) {
211 for (
int Idx = 0; Idx < NumOperands; Idx++) {
213 if (!IM || IM->
getOpcode() == Instruction::PHI)
221 assert(IM !=
I &&
"Unexpected cycle while re-ordering instructions");
224 InstructionsToMove.
insert(IM);
231 for (
auto BBI =
I->getIterator(),
E =
I->getParent()->end(); BBI !=
E;) {
233 if (!InstructionsToMove.
contains(IM))
245 TargetTransformInfo &TTI;
246 const DataLayout &DL;
257 DenseSet<Instruction *> ExtraElements;
260 Vectorizer(Function &F,
AliasAnalysis &AA, AssumptionCache &AC,
261 DominatorTree &DT, ScalarEvolution &SE, TargetTransformInfo &TTI)
262 : F(F), AA(AA), AC(AC), DT(DT), SE(SE), TTI(TTI),
263 DL(F.getDataLayout()), Builder(SE.
getContext()) {}
268 static const unsigned MaxDepth = 3;
277 bool runOnEquivalenceClass(
const EqClassKey &EqClassKey,
283 bool runOnChain(Chain &
C);
289 std::vector<Chain> splitChainByContiguity(Chain &
C);
295 std::vector<Chain> splitChainByMayAliasInstrs(Chain &
C);
299 std::vector<Chain> splitChainByAlignment(Chain &
C);
303 bool vectorizeChain(Chain &
C);
306 std::optional<APInt> getConstantOffset(
Value *PtrA,
Value *PtrB,
307 Instruction *ContextInst,
309 std::optional<APInt> getConstantOffsetComplexAddrs(
Value *PtrA,
Value *PtrB,
310 Instruction *ContextInst,
312 std::optional<APInt> getConstantOffsetSelects(
Value *PtrA,
Value *PtrB,
313 Instruction *ContextInst,
319 Type *getChainElemTy(
const Chain &
C);
328 template <
bool IsLoadChain>
330 Instruction *ChainElem, Instruction *ChainBegin,
331 const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets,
332 BatchAAResults &BatchAA);
337 void mergeEquivalenceClasses(EquivalenceClassMap &EQClasses)
const;
358 bool accessIsAllowedAndFast(
unsigned SizeBytes,
unsigned AS, Align Alignment,
359 unsigned VecElemBits)
const;
365 ChainElem createExtraElementAfter(
const ChainElem &PrevElem,
Type *Ty,
366 APInt
Offset, StringRef Prefix,
367 Align Alignment =
Align());
372 FixedVectorType *VecTy);
376 void deleteExtraElements();
379class LoadStoreVectorizerLegacyPass :
public FunctionPass {
383 LoadStoreVectorizerLegacyPass() : FunctionPass(ID) {}
387 StringRef getPassName()
const override {
388 return "GPU Load and Store Vectorizer";
391 void getAnalysisUsage(AnalysisUsage &AU)
const override {
403char LoadStoreVectorizerLegacyPass::ID = 0;
406 "Vectorize load and Store instructions",
false,
false)
414 "Vectorize load and store instructions",
false,
false)
417 return new LoadStoreVectorizerLegacyPass();
420bool LoadStoreVectorizerLegacyPass::runOnFunction(
Function &
F) {
422 if (skipFunction(
F) ||
F.hasFnAttribute(Attribute::NoImplicitFloat))
425 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
426 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
427 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
428 TargetTransformInfo &
TTI =
429 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
431 AssumptionCache &AC =
432 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
434 return Vectorizer(
F, AA, AC, DT, SE,
TTI).run();
440 if (
F.hasFnAttribute(Attribute::NoImplicitFloat))
455bool Vectorizer::run() {
482 for (
auto It = Barriers.
begin(), End = std::prev(Barriers.
end()); It != End;
484 Changed |= runOnPseudoBB(*It, *std::next(It));
497 I->eraseFromParent();
501 deleteExtraElements();
510 dbgs() <<
"LSV: Running on pseudo-BB [" << *Begin <<
" ... ";
511 if (End != Begin->getParent()->end())
514 dbgs() <<
"<BB end>";
519 for (
const auto &[EqClassKey, EqClass] :
520 collectEquivalenceClasses(Begin, End))
521 Changed |= runOnEquivalenceClass(EqClassKey, EqClass);
526bool Vectorizer::runOnEquivalenceClass(
const EqClassKey &EqClassKey,
531 dbgs() <<
"LSV: Running on equivalence class of size " << EqClass.
size()
532 <<
" keyed on " << EqClassKey <<
":\n";
533 for (Instruction *
I : EqClass)
534 dbgs() <<
" " << *
I <<
"\n";
537 std::vector<Chain> Chains = gatherChains(EqClass);
539 <<
" nontrivial chains.\n";);
540 for (Chain &
C : Chains)
545bool Vectorizer::runOnChain(Chain &
C) {
547 dbgs() <<
"LSV: Running on chain with " <<
C.size() <<
" instructions:\n";
558 for (
auto &
C : splitChainByMayAliasInstrs(
C))
559 for (
auto &
C : splitChainByContiguity(
C))
560 for (
auto &
C : splitChainByAlignment(
C))
565std::vector<Chain> Vectorizer::splitChainByMayAliasInstrs(Chain &
C) {
569 sortChainInBBOrder(
C);
572 dbgs() <<
"LSV: splitChainByMayAliasInstrs considering chain:\n";
580 for (
const auto &
E :
C)
581 ChainOffsets.insert({&*
E.Inst,
E.OffsetFromLeader});
585 BatchAAResults BatchAA(AA);
598 auto Impl = [&](
auto IsLoad) {
600 auto [ChainBegin, ChainEnd] = [&](
auto IsLoad) {
601 if constexpr (IsLoad())
602 return std::make_pair(
C.begin(),
C.end());
604 return std::make_pair(
C.rbegin(),
C.rend());
606 assert(ChainBegin != ChainEnd);
608 std::vector<Chain> Chains;
611 for (
auto ChainIt = std::next(ChainBegin); ChainIt != ChainEnd; ++ChainIt) {
613 ChainOffsets, BatchAA)) {
614 LLVM_DEBUG(
dbgs() <<
"LSV: No intervening may-alias instrs; can merge "
615 << *ChainIt->Inst <<
" into " << *ChainBegin->Inst
620 dbgs() <<
"LSV: Found intervening may-alias instrs; cannot merge "
621 << *ChainIt->Inst <<
" into " << *ChainBegin->Inst <<
"\n");
622 if (NewChain.
size() > 1) {
624 dbgs() <<
"LSV: got nontrivial chain without aliasing instrs:\n";
627 Chains.emplace_back(std::move(NewChain));
634 if (NewChain.
size() > 1) {
636 dbgs() <<
"LSV: got nontrivial chain without aliasing instrs:\n";
639 Chains.emplace_back(std::move(NewChain));
645 return Impl(std::bool_constant<true>());
648 return Impl(std::bool_constant<false>());
651std::vector<Chain> Vectorizer::splitChainByContiguity(Chain &
C) {
655 sortChainInOffsetOrder(
C);
658 dbgs() <<
"LSV: splitChainByContiguity considering chain:\n";
672 Align OptimisticAlign =
Align(MaxVecRegBits / 8);
673 unsigned int MaxVectorNumElems =
674 MaxVecRegBits /
DL.getTypeSizeInBits(ElementType);
681 FixedVectorType *OptimisticVectorType =
693 APInt OffsetOfBestAlignedElemFromLeader =
C[0].OffsetFromLeader;
694 for (
const auto &
E :
C) {
696 if (ElementAlignment > BestAlignedElemAlign) {
697 BestAlignedElemAlign = ElementAlignment;
698 OffsetOfBestAlignedElemFromLeader =
E.OffsetFromLeader;
702 auto DeriveAlignFromBestAlignedElem = [&](APInt NewElemOffsetFromLeader) {
704 BestAlignedElemAlign,
705 (NewElemOffsetFromLeader - OffsetOfBestAlignedElemFromLeader)
710 unsigned ASPtrBits =
DL.getIndexSizeInBits(AS);
712 std::vector<Chain> Ret;
713 Ret.push_back({
C.front()});
715 unsigned ChainElemTyBits =
DL.getTypeSizeInBits(getChainElemTy(
C));
716 ChainElem &Prev =
C[0];
717 for (
auto It = std::next(
C.begin()), End =
C.end(); It != End; ++It) {
718 auto &CurChain = Ret.back();
722 APInt PrevReadEnd = Prev.OffsetFromLeader + PrevSzBytes;
727 8 * SzBytes % ChainElemTyBits == 0 &&
728 "Every chain-element size must be a multiple of the element size after "
730 APInt ReadEnd = It->OffsetFromLeader + SzBytes;
732 bool AreContiguous =
false;
733 if (It->OffsetFromLeader.sle(PrevReadEnd)) {
735 uint64_t Overlap = (PrevReadEnd - It->OffsetFromLeader).getZExtValue();
736 if (8 * Overlap % ChainElemTyBits == 0)
737 AreContiguous =
true;
741 << (AreContiguous ?
"contiguous" :
"chain-breaker")
742 << *It->Inst <<
" (starts at offset "
743 << It->OffsetFromLeader <<
")\n");
751 bool GapFilled =
false;
752 if (!AreContiguous && TryFillGaps && PrevSzBytes == SzBytes) {
753 APInt GapSzBytes = It->OffsetFromLeader - PrevReadEnd;
754 if (GapSzBytes == PrevSzBytes) {
756 ChainElem NewElem = createExtraElementAfter(
758 DeriveAlignFromBestAlignedElem(PrevReadEnd));
759 CurChain.push_back(NewElem);
765 if ((GapSzBytes == 2 * PrevSzBytes) && (CurChain.size() % 4 == 1)) {
766 ChainElem NewElem1 = createExtraElementAfter(
768 DeriveAlignFromBestAlignedElem(PrevReadEnd));
769 ChainElem NewElem2 = createExtraElementAfter(
771 DeriveAlignFromBestAlignedElem(PrevReadEnd + PrevSzBytes));
772 CurChain.push_back(NewElem1);
773 CurChain.push_back(NewElem2);
778 if (AreContiguous || GapFilled)
779 CurChain.push_back(*It);
781 Ret.push_back({*It});
785 if (ReadEnd.
sge(PrevReadEnd))
790 llvm::erase_if(Ret, [](
const auto &Chain) {
return Chain.size() <= 1; });
794Type *Vectorizer::getChainElemTy(
const Chain &
C) {
807 if (
any_of(
C, [](
const ChainElem &
E) {
810 return Type::getIntNTy(
815 for (
const ChainElem &
E :
C)
821std::vector<Chain> Vectorizer::splitChainByAlignment(Chain &
C) {
834 sortChainInOffsetOrder(
C);
837 dbgs() <<
"LSV: splitChainByAlignment considering chain:\n";
842 auto GetVectorFactor = [&](
unsigned VF,
unsigned LoadStoreSize,
845 ChainSizeBytes, VecTy)
847 ChainSizeBytes, VecTy);
851 for (
const auto &
E :
C) {
854 "Should have filtered out non-power-of-two elements in "
855 "collectEquivalenceClasses.");
865 bool CandidateChainsMayContainExtraLoadsStores =
any_of(
866 C, [
this](
const ChainElem &
E) {
return ExtraElements.
contains(
E.Inst); });
868 std::vector<Chain> Ret;
869 for (
unsigned CBegin = 0; CBegin <
C.size(); ++CBegin) {
877 APInt PrevReadEnd =
C[CBegin].OffsetFromLeader + Sz;
878 for (
unsigned CEnd = CBegin + 1,
Size =
C.size(); CEnd <
Size; ++CEnd) {
879 APInt ReadEnd =
C[CEnd].OffsetFromLeader +
881 unsigned BytesAdded =
882 PrevReadEnd.
sle(ReadEnd) ? (ReadEnd - PrevReadEnd).getSExtValue() : 0;
884 if (Sz > VecRegBytes)
886 CandidateChains.emplace_back(CEnd, Sz);
891 for (
auto It = CandidateChains.rbegin(), End = CandidateChains.rend();
893 auto [CEnd, SizeBytes] = *It;
895 dbgs() <<
"LSV: splitChainByAlignment considering candidate chain ["
896 << *
C[CBegin].Inst <<
" ... " << *
C[CEnd].Inst <<
"]\n");
898 Type *VecElemTy = getChainElemTy(
C);
902 unsigned VecElemBits =
DL.getTypeSizeInBits(VecElemTy);
905 assert((8 * SizeBytes) % VecElemBits == 0);
906 unsigned NumVecElems = 8 * SizeBytes / VecElemBits;
908 unsigned VF = 8 * VecRegBytes / VecElemBits;
911 unsigned TargetVF = GetVectorFactor(VF, VecElemBits,
912 VecElemBits * NumVecElems / 8, VecTy);
913 if (TargetVF != VF && TargetVF < NumVecElems) {
915 dbgs() <<
"LSV: splitChainByAlignment discarding candidate chain "
917 << TargetVF <<
" != VF=" << VF
918 <<
" and TargetVF < NumVecElems=" << NumVecElems <<
"\n");
932 bool IsAllocaAccess = AS ==
DL.getAllocaAddrSpace() &&
935 Align PrefAlign =
Align(StackAdjustedAlignment);
936 if (IsAllocaAccess && Alignment.
value() % SizeBytes != 0 &&
937 accessIsAllowedAndFast(SizeBytes, AS, PrefAlign, VecElemBits)) {
939 PtrOperand, PrefAlign,
DL,
C[CBegin].Inst,
nullptr, &DT);
940 if (NewAlign >= Alignment) {
942 <<
"LSV: splitByChain upgrading alloca alignment from "
943 << Alignment.
value() <<
" to " << NewAlign.
value()
945 Alignment = NewAlign;
949 Chain ExtendingLoadsStores;
950 if (!accessIsAllowedAndFast(SizeBytes, AS, Alignment, VecElemBits)) {
954 bool AllowedAndFast =
false;
959 assert(VecElemBits % 8 == 0);
960 unsigned VecElemBytes = VecElemBits / 8;
962 unsigned NewSizeBytes = VecElemBytes * NewNumVecElems;
965 "TargetVF expected to be a power of 2");
966 assert(NewNumVecElems <= TargetVF &&
967 "Should not extend past TargetVF");
970 <<
"LSV: attempting to extend chain of " << NumVecElems
971 <<
" " << (IsLoadChain ?
"loads" :
"stores") <<
" to "
972 << NewNumVecElems <<
" elements\n");
973 bool IsLegalToExtend =
983 if (IsLegalToExtend &&
984 accessIsAllowedAndFast(NewSizeBytes, AS, Alignment,
987 <<
"LSV: extending " << (IsLoadChain ?
"load" :
"store")
988 <<
" chain of " << NumVecElems <<
" "
989 << (IsLoadChain ?
"loads" :
"stores")
990 <<
" with total byte size of " << SizeBytes <<
" to "
991 << NewNumVecElems <<
" "
992 << (IsLoadChain ?
"loads" :
"stores")
993 <<
" with total byte size of " << NewSizeBytes
994 <<
", TargetVF=" << TargetVF <<
" \n");
1000 unsigned ASPtrBits =
DL.getIndexSizeInBits(AS);
1001 for (
unsigned I = 0;
I < (NewNumVecElems - NumVecElems);
I++) {
1002 ChainElem NewElem = createExtraElementAfter(
1003 C[CBegin], VecElemTy,
1004 APInt(ASPtrBits, SizeBytes +
I * VecElemBytes),
"Extend");
1005 ExtendingLoadsStores.push_back(NewElem);
1009 SizeBytes = NewSizeBytes;
1010 NumVecElems = NewNumVecElems;
1011 AllowedAndFast =
true;
1014 if (!AllowedAndFast) {
1017 <<
"LSV: splitChainByAlignment discarding candidate chain "
1018 "because its alignment is not AllowedAndFast: "
1019 << Alignment.
value() <<
"\n");
1029 dbgs() <<
"LSV: splitChainByAlignment discarding candidate chain "
1030 "because !isLegalToVectorizeLoad/StoreChain.");
1034 if (CandidateChainsMayContainExtraLoadsStores) {
1046 [
this](
const ChainElem &
E) {
1050 if (CurrCandContainsExtraLoadsStores &&
1058 <<
"LSV: splitChainByAlignment discarding candidate chain "
1059 "because it contains extra loads/stores that we cannot "
1060 "legally vectorize into a masked load/store \n");
1067 for (
unsigned I = CBegin;
I <= CEnd; ++
I)
1068 NewChain.emplace_back(
C[
I]);
1069 for (ChainElem
E : ExtendingLoadsStores)
1070 NewChain.emplace_back(
E);
1078bool Vectorizer::vectorizeChain(Chain &
C) {
1083 C, [
this](
const ChainElem &
E) {
return ExtraElements.
contains(
E.Inst); });
1087 if (
C.size() == 2 && ChainContainsExtraLoadsStores)
1090 sortChainInOffsetOrder(
C);
1093 dbgs() <<
"LSV: Vectorizing chain of " <<
C.size() <<
" instructions:\n";
1097 Type *VecElemTy = getChainElemTy(
C);
1101 APInt PrevReadEnd =
C[0].OffsetFromLeader + BytesAdded;
1102 unsigned ChainBytes = BytesAdded;
1103 for (
auto It = std::next(
C.begin()), End =
C.end(); It != End; ++It) {
1105 APInt ReadEnd = It->OffsetFromLeader + SzBytes;
1108 PrevReadEnd.
sle(ReadEnd) ? (ReadEnd - PrevReadEnd).getSExtValue() : 0;
1109 ChainBytes += BytesAdded;
1113 assert(8 * ChainBytes %
DL.getTypeSizeInBits(VecElemTy) == 0);
1116 unsigned NumElem = 8 * ChainBytes /
DL.getTypeSizeInBits(VecElemTy);
1122 if (AS ==
DL.getAllocaAddrSpace()) {
1123 Alignment = std::max(
1126 MaybeAlign(),
DL,
C[0].Inst,
nullptr, &DT));
1131 for (
const ChainElem &
E :
C)
1133 DL.getTypeStoreSize(VecElemTy));
1142 return A.Inst->comesBefore(
B.Inst);
1147 if (ChainContainsExtraLoadsStores) {
1164 for (
const ChainElem &
E :
C) {
1169 (
E.OffsetFromLeader -
C[0].OffsetFromLeader).getZExtValue();
1170 unsigned VecIdx = 8 * EOffset /
DL.getTypeSizeInBits(VecElemTy);
1181 if (
V->getType() !=
I->getType())
1209 return A.Inst->comesBefore(
B.Inst);
1214 auto InsertElem = [&](
Value *
V,
unsigned VecIdx) {
1215 if (
V->getType() != VecElemTy)
1219 for (
const ChainElem &
E :
C) {
1222 (
E.OffsetFromLeader -
C[0].OffsetFromLeader).getZExtValue();
1223 unsigned VecIdx = 8 * EOffset /
DL.getTypeSizeInBits(VecElemTy);
1224 if (FixedVectorType *VT =
1226 for (
int J = 0, JE = VT->getNumElements(); J < JE; ++J) {
1232 InsertElem(
I->getValueOperand(), VecIdx);
1238 if (ChainContainsExtraLoadsStores) {
1255 for (
const ChainElem &
E :
C)
1256 ToErase.emplace_back(
E.Inst);
1258 ++NumVectorInstructions;
1259 NumScalarsVectorized +=
C.size();
1263template <
bool IsLoadChain>
1264bool Vectorizer::isSafeToMove(
1265 Instruction *ChainElem, Instruction *ChainBegin,
1266 const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets,
1267 BatchAAResults &BatchAA) {
1268 LLVM_DEBUG(
dbgs() <<
"LSV: isSafeToMove(" << *ChainElem <<
" -> "
1269 << *ChainBegin <<
")\n");
1272 if (ChainElem == ChainBegin)
1280 auto BBIt = std::next([&] {
1281 if constexpr (IsLoadChain)
1286 auto BBItEnd = std::next([&] {
1287 if constexpr (IsLoadChain)
1293 const APInt &ChainElemOffset = ChainOffsets.
at(ChainElem);
1294 const unsigned ChainElemSize =
1297 for (; BBIt != BBItEnd; ++BBIt) {
1300 if (!
I->mayReadOrWriteMemory())
1317 if (
auto OffsetIt = ChainOffsets.
find(
I); OffsetIt != ChainOffsets.
end()) {
1324 const APInt &IOffset = OffsetIt->second;
1326 if (IOffset == ChainElemOffset ||
1327 (IOffset.
sle(ChainElemOffset) &&
1328 (IOffset + IElemSize).sgt(ChainElemOffset)) ||
1329 (ChainElemOffset.sle(IOffset) &&
1330 (ChainElemOffset + ChainElemSize).sgt(OffsetIt->second))) {
1337 dbgs() <<
"LSV: Found alias in chain: " << *
I <<
"\n";
1349 <<
" Aliasing instruction:\n"
1350 <<
" " << *
I <<
'\n'
1351 <<
" Aliased instruction and pointer:\n"
1352 <<
" " << *ChainElem <<
'\n'
1374 switch (
I->getOpcode()) {
1377 case Instruction::Add:
1379 case Instruction::Or:
1381 return PDI->isDisjoint();
1389 unsigned MatchingOpIdxB,
bool Signed) {
1390 LLVM_DEBUG(
dbgs() <<
"LSV: checkIfSafeAddSequence IdxDiff=" << IdxDiff
1391 <<
", AddOpA=" << *AddOpA <<
", MatchingOpIdxA="
1392 << MatchingOpIdxA <<
", AddOpB=" << *AddOpB
1393 <<
", MatchingOpIdxB=" << MatchingOpIdxB
1394 <<
", Signed=" <<
Signed <<
"\n");
1414 Value *OtherOperandA = AddOpA->
getOperand(MatchingOpIdxA == 1 ? 0 : 1);
1415 Value *OtherOperandB = AddOpB->
getOperand(MatchingOpIdxB == 1 ? 0 : 1);
1419 if (OtherInstrB &&
isAddLike(OtherInstrB) &&
1424 if (OtherInstrB->
getOperand(0) == OtherOperandA &&
1429 if (OtherInstrA &&
isAddLike(OtherInstrA) &&
1434 if (OtherInstrA->
getOperand(0) == OtherOperandB &&
1440 if (OtherInstrA && OtherInstrB &&
isAddLike(OtherInstrA) &&
1457std::optional<APInt> Vectorizer::getConstantOffsetComplexAddrs(
1459 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetComplexAddrs PtrA=" << *PtrA
1460 <<
" PtrB=" << *PtrB <<
" ContextInst=" << *ContextInst
1461 <<
" Depth=" <<
Depth <<
"\n");
1465 return getConstantOffsetSelects(PtrA, PtrB, ContextInst,
Depth);
1469 if (GEPA->getNumOperands() != GEPB->getNumOperands() ||
1470 GEPA->getPointerOperand() != GEPB->getPointerOperand() ||
1471 GEPA->getSourceElementType() != GEPB->getSourceElementType())
1472 return std::nullopt;
1475 for (
unsigned I = 0,
E = GEPA->getNumIndices() - 1;
I <
E; ++
I) {
1477 return std::nullopt;
1486 return std::nullopt;
1492 return std::nullopt;
1500 return std::nullopt;
1502 const SCEV *OffsetSCEVA = SE.
getSCEV(ValA);
1503 const SCEV *OffsetSCEVB = SE.
getSCEV(OpB);
1504 const SCEV *IdxDiffSCEV = SE.
getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
1506 return std::nullopt;
1510 return std::nullopt;
1513 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetComplexAddrs IdxDiff=" << IdxDiff
1534 for (
unsigned MatchingOpIdxA : {0, 1})
1535 for (
unsigned MatchingOpIdxB : {0, 1})
1556 APInt BitsAllowedToBeSet = Known.Zero.zext(IdxDiff.
getBitWidth());
1559 Safe = BitsAllowedToBeSet.
uge(IdxDiff.
abs());
1567 Value *CheckVal = IdxDiff.
sge(0) ? ValA : OpB;
1575 return IdxDiff * Stride;
1576 return std::nullopt;
1579std::optional<APInt> Vectorizer::getConstantOffsetSelects(
1581 if (
Depth++ == MaxDepth)
1582 return std::nullopt;
1586 if (SelectA->getCondition() != SelectB->getCondition())
1587 return std::nullopt;
1588 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetSelects, PtrA=" << *PtrA
1589 <<
", PtrB=" << *PtrB <<
", ContextInst="
1590 << *ContextInst <<
", Depth=" <<
Depth <<
"\n");
1591 std::optional<APInt> TrueDiff = getConstantOffset(
1592 SelectA->getTrueValue(), SelectB->getTrueValue(), ContextInst,
Depth);
1594 return std::nullopt;
1595 std::optional<APInt> FalseDiff =
1596 getConstantOffset(SelectA->getFalseValue(), SelectB->getFalseValue(),
1597 ContextInst,
Depth);
1598 if (TrueDiff == FalseDiff)
1602 return std::nullopt;
1605void Vectorizer::mergeEquivalenceClasses(EquivalenceClassMap &EQClasses)
const {
1606 if (EQClasses.size() < 2)
1611 static_assert(std::tuple_size_v<EqClassKey> == 4,
1612 "EqClassKey has changed - EqClassReducedKey needs changes too");
1613 using EqClassReducedKey =
1614 std::tuple<std::tuple_element_t<1, EqClassKey> ,
1615 std::tuple_element_t<2, EqClassKey> ,
1616 std::tuple_element_t<3, EqClassKey> >;
1617 using ECReducedKeyToUnderlyingObjectMap =
1618 MapVector<EqClassReducedKey,
1619 SmallPtrSet<std::tuple_element_t<0, EqClassKey>, 4>>;
1624 ECReducedKeyToUnderlyingObjectMap RedKeyToUOMap;
1625 bool FoundPotentiallyOptimizableEC =
false;
1626 for (
const auto &EC : EQClasses) {
1627 const auto &
Key =
EC.first;
1628 EqClassReducedKey RedKey{std::get<1>(
Key), std::get<2>(
Key),
1630 auto &UOMap = RedKeyToUOMap[RedKey];
1632 if (UOMap.size() > 1)
1633 FoundPotentiallyOptimizableEC =
true;
1635 if (!FoundPotentiallyOptimizableEC)
1639 dbgs() <<
"LSV: mergeEquivalenceClasses: before merging:\n";
1640 for (
const auto &EC : EQClasses) {
1641 dbgs() <<
" Key: {" <<
EC.first <<
"}\n";
1642 for (
const auto &Inst :
EC.second)
1643 dbgs() <<
" Inst: " << *Inst <<
'\n';
1647 dbgs() <<
"LSV: mergeEquivalenceClasses: RedKeyToUOMap:\n";
1648 for (
const auto &RedKeyToUO : RedKeyToUOMap) {
1649 dbgs() <<
" Reduced key: {" << std::get<0>(RedKeyToUO.first) <<
", "
1650 << std::get<1>(RedKeyToUO.first) <<
", "
1651 <<
static_cast<int>(std::get<2>(RedKeyToUO.first)) <<
"} --> "
1652 << RedKeyToUO.second.size() <<
" underlying objects:\n";
1653 for (
auto UObject : RedKeyToUO.second)
1654 dbgs() <<
" " << *UObject <<
'\n';
1658 using UObjectToUObjectMap = DenseMap<const Value *, const Value *>;
1661 auto GetUltimateTargets =
1662 [](SmallPtrSetImpl<const Value *> &UObjects) -> UObjectToUObjectMap {
1663 UObjectToUObjectMap IndirectionMap;
1664 for (
const auto *UObject : UObjects) {
1665 const unsigned MaxLookupDepth = 1;
1667 if (UltimateTarget != UObject)
1668 IndirectionMap[UObject] = UltimateTarget;
1670 UObjectToUObjectMap UltimateTargetsMap;
1671 for (
const auto *UObject : UObjects) {
1673 auto It = IndirectionMap.find(Target);
1674 for (; It != IndirectionMap.end(); It = IndirectionMap.find(Target))
1676 UltimateTargetsMap[UObject] =
Target;
1678 return UltimateTargetsMap;
1683 for (
auto &[RedKey, UObjects] : RedKeyToUOMap) {
1684 if (UObjects.size() < 2)
1686 auto UTMap = GetUltimateTargets(UObjects);
1687 for (
const auto &[UObject, UltimateTarget] : UTMap) {
1688 if (UObject == UltimateTarget)
1691 EqClassKey KeyFrom{UObject, std::get<0>(RedKey), std::get<1>(RedKey),
1692 std::get<2>(RedKey)};
1693 EqClassKey KeyTo{UltimateTarget, std::get<0>(RedKey), std::get<1>(RedKey),
1694 std::get<2>(RedKey)};
1697 const auto &VecTo = EQClasses[KeyTo];
1698 const auto &VecFrom = EQClasses[KeyFrom];
1699 SmallVector<Instruction *, 8> MergedVec;
1700 std::merge(VecFrom.begin(), VecFrom.end(), VecTo.begin(), VecTo.end(),
1701 std::back_inserter(MergedVec),
1702 [](Instruction *
A, Instruction *
B) {
1703 return A && B && A->comesBefore(B);
1705 EQClasses[KeyTo] = std::move(MergedVec);
1706 EQClasses.erase(KeyFrom);
1710 dbgs() <<
"LSV: mergeEquivalenceClasses: after merging:\n";
1711 for (
const auto &EC : EQClasses) {
1712 dbgs() <<
" Key: {" <<
EC.first <<
"}\n";
1713 for (
const auto &Inst :
EC.second)
1714 dbgs() <<
" Inst: " << *Inst <<
'\n';
1722 EquivalenceClassMap Ret;
1724 auto GetUnderlyingObject = [](
const Value *Ptr) ->
const Value * {
1733 return Sel->getCondition();
1744 if ((LI && !LI->
isSimple()) || (SI && !
SI->isSimple()))
1757 unsigned TySize =
DL.getTypeSizeInBits(Ty);
1758 if ((TySize % 8) != 0)
1772 unsigned VF = VecRegSize / TySize;
1777 (VecTy && !
isPowerOf2_32(
DL.getTypeSizeInBits(VecTy->getScalarType()))))
1781 if (TySize > VecRegSize / 2 ||
1785 Ret[{GetUnderlyingObject(Ptr), AS,
1791 mergeEquivalenceClasses(Ret);
1800 unsigned ASPtrBits =
DL.getIndexSizeInBits(AS);
1804 for (
size_t I = 1;
I < Instrs.
size(); ++
I) {
1805 assert(Instrs[
I - 1]->comesBefore(Instrs[
I]));
1814 struct InstrListElem : ilist_node<InstrListElem>,
1815 std::pair<Instruction *, Chain> {
1816 explicit InstrListElem(Instruction *
I)
1819 struct InstrListElemDenseMapInfo {
1820 using PtrInfo = DenseMapInfo<InstrListElem *>;
1821 using IInfo = DenseMapInfo<Instruction *>;
1822 static InstrListElem *getEmptyKey() {
return PtrInfo::getEmptyKey(); }
1823 static InstrListElem *getTombstoneKey() {
1824 return PtrInfo::getTombstoneKey();
1826 static unsigned getHashValue(
const InstrListElem *
E) {
1827 return IInfo::getHashValue(
E->first);
1829 static bool isEqual(
const InstrListElem *
A,
const InstrListElem *
B) {
1830 if (
A == getEmptyKey() ||
B == getEmptyKey())
1831 return A == getEmptyKey() &&
B == getEmptyKey();
1832 if (
A == getTombstoneKey() ||
B == getTombstoneKey())
1833 return A == getTombstoneKey() &&
B == getTombstoneKey();
1834 return IInfo::isEqual(
A->first,
B->first);
1837 SpecificBumpPtrAllocator<InstrListElem>
Allocator;
1838 simple_ilist<InstrListElem> MRU;
1839 DenseSet<InstrListElem *, InstrListElemDenseMapInfo> Chains;
1844 for (Instruction *
I : Instrs) {
1845 constexpr int MaxChainsToTry = 64;
1847 bool MatchFound =
false;
1848 auto ChainIter = MRU.
begin();
1849 for (
size_t J = 0; J < MaxChainsToTry && ChainIter != MRU.
end();
1851 if (std::optional<APInt>
Offset = getConstantOffset(
1855 (ChainIter->first->comesBefore(
I) ?
I : ChainIter->first))) {
1858 ChainIter->second.emplace_back(
I,
Offset.value());
1868 APInt ZeroOffset(ASPtrBits, 0);
1869 InstrListElem *
E =
new (
Allocator.Allocate()) InstrListElem(
I);
1870 E->second.emplace_back(
I, ZeroOffset);
1876 std::vector<Chain> Ret;
1877 Ret.reserve(Chains.
size());
1880 if (
E.second.size() > 1)
1881 Ret.emplace_back(std::move(
E.second));
1885std::optional<APInt> Vectorizer::getConstantOffset(
Value *PtrA,
Value *PtrB,
1886 Instruction *ContextInst,
1889 <<
", PtrB=" << *PtrB <<
", ContextInst= " << *ContextInst
1890 <<
", Depth=" <<
Depth <<
"\n");
1893 unsigned OrigBitWidth =
DL.getIndexTypeSizeInBits(PtrA->
getType());
1894 APInt OffsetA(OrigBitWidth, 0);
1895 APInt OffsetB(OrigBitWidth, 0);
1898 unsigned NewPtrBitWidth =
DL.getTypeStoreSizeInBits(PtrA->
getType());
1899 if (NewPtrBitWidth !=
DL.getTypeStoreSizeInBits(PtrB->
getType()))
1900 return std::nullopt;
1905 assert(OffsetA.getSignificantBits() <= NewPtrBitWidth &&
1906 OffsetB.getSignificantBits() <= NewPtrBitWidth);
1908 OffsetA = OffsetA.sextOrTrunc(NewPtrBitWidth);
1909 OffsetB = OffsetB.sextOrTrunc(NewPtrBitWidth);
1911 return (OffsetB - OffsetA).sextOrTrunc(OrigBitWidth);
1916 LLVM_DEBUG(
dbgs() <<
"LSV: SCEV PtrB - PtrA =" << *DistScev <<
"\n");
1922 return (OffsetB - OffsetA + Dist).
sextOrTrunc(OrigBitWidth);
1925 if (std::optional<APInt> Diff =
1926 getConstantOffsetComplexAddrs(PtrA, PtrB, ContextInst,
Depth))
1927 return (OffsetB - OffsetA + Diff->sext(OffsetB.getBitWidth()))
1928 .sextOrTrunc(OrigBitWidth);
1929 return std::nullopt;
1932bool Vectorizer::accessIsAllowedAndFast(
unsigned SizeBytes,
unsigned AS,
1934 unsigned VecElemBits)
const {
1936 if (Alignment.
value() % SizeBytes == 0)
1940 unsigned VectorizedSpeed = 0;
1942 F.getContext(), SizeBytes * 8, AS, Alignment, &VectorizedSpeed);
1943 if (!AllowsMisaligned) {
1945 dbgs() <<
"LSV: Access of " << SizeBytes <<
"B in addrspace " << AS
1946 <<
" with alignment " << Alignment.
value()
1947 <<
" is misaligned, and therefore can't be vectorized.\n");
1951 unsigned ElementwiseSpeed = 0;
1952 (
TTI).allowsMisalignedMemoryAccesses((
F).
getContext(), VecElemBits, AS,
1953 Alignment, &ElementwiseSpeed);
1954 if (VectorizedSpeed < ElementwiseSpeed) {
1955 LLVM_DEBUG(
dbgs() <<
"LSV: Access of " << SizeBytes <<
"B in addrspace "
1956 << AS <<
" with alignment " << Alignment.
value()
1957 <<
" has relative speed " << VectorizedSpeed
1958 <<
", which is lower than the elementwise speed of "
1960 <<
". Therefore this access won't be vectorized.\n");
1966ChainElem Vectorizer::createExtraElementAfter(
const ChainElem &Prev,
Type *Ty,
1967 APInt
Offset, StringRef Prefix,
1973 PrevLoad->getPointerOperand(), Builder.
getInt(
Offset), Prefix +
"GEP");
1974 LLVM_DEBUG(
dbgs() <<
"LSV: Extra GEP Created: \n" << *NewGep <<
"\n");
1981 LLVM_DEBUG(
dbgs() <<
"LSV: Extra GEP Created: \n" << *NewGep <<
"\n");
1991 ExtraElements.
insert(NewElement);
1993 APInt NewOffsetFromLeader = Prev.OffsetFromLeader +
Offset;
1996 <<
" OffsetFromLeader: " << NewOffsetFromLeader <<
"\n");
1997 return ChainElem{NewElement, NewOffsetFromLeader};
2001 FixedVectorType *VecTy) {
2007 for (
const ChainElem &
E :
C) {
2011 (
E.OffsetFromLeader -
C[0].OffsetFromLeader).getZExtValue();
2014 if (FixedVectorType *VT =
2016 for (
unsigned J = 0; J < VT->getNumElements(); ++J)
2017 MaskElts[VecIdx + J] = Builder.
getInt1(
true);
2019 MaskElts[VecIdx] = Builder.
getInt1(
true);
2024void Vectorizer::deleteExtraElements() {
2025 for (
auto *ExtraElement : ExtraElements) {
2027 [[maybe_unused]]
bool Deleted =
2029 assert(
Deleted &&
"Extra Load should always be trivially dead");
2035 ExtraElement->eraseFromParent();
2040 ExtraElements.clear();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
Module.h This file contains the declarations for the Module class.
static bool checkNoWrapFlags(Instruction *I, bool Signed)
static bool checkIfSafeAddSequence(const APInt &IdxDiff, Instruction *AddOpA, unsigned MatchingOpIdxA, Instruction *AddOpB, unsigned MatchingOpIdxB, bool Signed)
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
static bool isAddLike(const SDValue V)
static bool isInvariantLoad(const Instruction *I, const Value *Ptr, const bool IsKernelFn)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
Provides some synthesis utilities to produce sequences of values.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static bool isSafeToMove(const MachineOperand *Def, const MachineOperand *Use, const MachineInstr *Insert, const WebAssemblyFunctionInfo &MFI, const MachineRegisterInfo &MRI, bool Optimize)
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
APInt abs() const
Get the absolute value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool sle(const APInt &RHS) const
Signed less or equal comparison.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
int64_t getSExtValue() const
Get sign extended value.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
bool empty() const
Check if the array is empty.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
InstListType::reverse_iterator reverse_iterator
InstListType::iterator iterator
Instruction iterators...
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Represents analyses that only rely on functions' control flow.
const APInt * getSingleElement() const
If this set contains a single element, return it, otherwise return null.
bool isSingleElement() const
Return true if this set contains exactly one member.
LLVM_ABI APInt getUnsignedMax() const
Return the largest unsigned value contained in the ConstantRange.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
ValueT & at(const_arg_type_t< KeyT > Val)
Return the entry for the specified key, or abort if no such entry exists.
iterator find(const_arg_type_t< KeyT > Val)
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
Legacy wrapper pass to provide the GlobalsAAResult object.
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
ConstantInt * getInt(const APInt &AI)
Get a constant integer value.
LLVM_ABI bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
LLVM_ABI bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
An instruction for reading from memory.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This class implements a map that also provides access to all stored values in a deterministic order.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Pass interface - Implemented by all 'passes'.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Legacy wrapper pass to provide the SCEVAAResult object.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
LLVM_ABI const SCEV * getCouldNotCompute()
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Value * getPointerOperand()
Analysis pass providing the TargetTransformInfo.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
TypeSize getSequentialElementStride(const DataLayout &DL) const
Value * getOperand() const
const ParentTy * getParent() const
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
This class implements an extremely fast bulk output stream that can only output to a stream.
void push_front(reference Node)
Insert a node at the front; never copies.
void remove(reference N)
Remove a node by reference; never deletes.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
DXILDebugInfoMap run(Module &M)
ElementType
The element type of an SRV or UAV resource.
Context & getContext() const
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
generic_gep_type_iterator<> gep_type_iterator
bool isModOrRefSet(const ModRefInfo MRI)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto post_order(const T &G)
Post-order traversal of a graph.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.