34#include "llvm/IR/IntrinsicsHexagon.h"
58#define DEBUG_TYPE "hexagon-vc"
63#define DEFAULT_HVX_VTCM_PAGE_SIZE 131072
78class HexagonVectorCombine {
83 :
F(F_),
DL(
F.getDataLayout()),
AA(AA_), AC(AC_), DT(DT_),
93 Type *getByteTy(
int ElemCount = 0)
const;
96 Type *getBoolTy(
int ElemCount = 0)
const;
100 std::optional<APInt> getIntValue(
const Value *Val)
const;
106 bool isTrue(
const Value *Val)
const;
108 bool isFalse(
const Value *Val)
const;
117 int getSizeOf(
const Value *Val, SizeKind Kind = Store)
const;
118 int getSizeOf(
const Type *Ty, SizeKind Kind = Store)
const;
119 int getTypeAlignment(
Type *Ty)
const;
120 size_t length(
Value *Val)
const;
121 size_t length(
Type *Ty)
const;
126 int Length,
int Where)
const;
150 unsigned ToWidth)
const;
154 std::optional<int> calculatePointerDifference(
Value *Ptr0,
Value *Ptr1)
const;
156 unsigned getNumSignificantBits(
const Value *V,
163 template <
typename T = std::vector<Instruction *>>
166 const T &IgnoreInsts = {})
const;
169 [[maybe_unused]]
bool isByteVecTy(
Type *Ty)
const;
182 int Start,
int Length)
const;
201 AlignVectors(
const HexagonVectorCombine &HVC_) : HVC(HVC_) {}
206 using InstList = std::vector<Instruction *>;
210 AddrInfo(
const AddrInfo &) =
default;
213 : Inst(
I), Addr(
A), ValTy(
T), HaveAlign(
H),
214 NeedAlign(HVC.getTypeAlignment(ValTy)) {}
215 AddrInfo &operator=(
const AddrInfo &) =
default;
226 using AddrList = std::vector<AddrInfo>;
230 return A->comesBefore(
B);
233 using DepList = std::set<Instruction *, InstrLess>;
236 MoveGroup(
const AddrInfo &AI,
Instruction *
B,
bool Hvx,
bool Load)
237 :
Base(
B), Main{AI.Inst}, Clones{}, IsHvx(Hvx), IsLoad(Load) {}
238 MoveGroup() =
default;
246 using MoveList = std::vector<MoveGroup>;
266 Segment(
Value *Val,
int Begin,
int Len)
267 : Val(Val), Start(Begin),
Size(Len) {}
268 Segment(
const Segment &Seg) =
default;
269 Segment &operator=(
const Segment &Seg) =
default;
276 Block(
Value *Val,
int Len,
int Pos) : Seg(Val, 0, Len), Pos(Pos) {}
278 : Seg(Val, Off, Len), Pos(Pos) {}
286 ByteSpan section(
int Start,
int Length)
const;
287 ByteSpan &shift(
int Offset);
290 int size()
const {
return Blocks.size(); }
291 Block &operator[](
int i) {
return Blocks[i]; }
292 const Block &operator[](
int i)
const {
return Blocks[i]; }
294 std::vector<Block> Blocks;
297 iterator begin() {
return Blocks.begin(); }
298 iterator end() {
return Blocks.end(); }
304 std::optional<AddrInfo> getAddrInfo(
Instruction &In)
const;
305 bool isHvx(
const AddrInfo &AI)
const;
307 [[maybe_unused]]
bool isSectorTy(
Type *Ty)
const;
315 const InstMap &CloneMap = InstMap())
const;
318 const InstMap &CloneMap = InstMap())
const;
342 bool createAddressGroups();
343 MoveList createLoadGroups(
const AddrList &Group)
const;
344 MoveList createStoreGroups(
const AddrList &Group)
const;
345 bool moveTogether(MoveGroup &Move)
const;
346 template <
typename T>
349 void realignLoadGroup(
IRBuilderBase &Builder,
const ByteSpan &VSpan,
350 int ScLen,
Value *AlignVal,
Value *AlignAddr)
const;
351 void realignStoreGroup(
IRBuilderBase &Builder,
const ByteSpan &VSpan,
352 int ScLen,
Value *AlignVal,
Value *AlignAddr)
const;
353 bool realignGroup(
const MoveGroup &Move)
const;
356 int Alignment)
const;
363 std::map<Instruction *, AddrList> AddrGroups;
364 const HexagonVectorCombine &HVC;
368 const AlignVectors::AddrInfo &AI) {
369 OS <<
"Inst: " << AI.Inst <<
" " << *AI.Inst <<
'\n';
370 OS <<
"Addr: " << *AI.Addr <<
'\n';
371 OS <<
"Type: " << *AI.ValTy <<
'\n';
372 OS <<
"HaveAlign: " << AI.HaveAlign.
value() <<
'\n';
373 OS <<
"NeedAlign: " << AI.NeedAlign.
value() <<
'\n';
374 OS <<
"Offset: " << AI.Offset;
379 const AlignVectors::MoveGroup &MG) {
380 OS <<
"IsLoad:" << (MG.IsLoad ?
"yes" :
"no");
381 OS <<
", IsHvx:" << (MG.IsHvx ?
"yes" :
"no") <<
'\n';
384 OS <<
" " << *
I <<
'\n';
387 OS <<
" " << *
I <<
'\n';
389 for (
auto [K, V] : MG.Clones) {
391 K->printAsOperand(OS,
false);
392 OS <<
"\t-> " << *V <<
'\n';
399 OS <<
" @" <<
B.Pos <<
" [" <<
B.Seg.Start <<
',' <<
B.Seg.Size <<
"] ";
400 if (
B.Seg.Val ==
reinterpret_cast<const Value *
>(&
B)) {
401 OS <<
"(self:" <<
B.Seg.Val <<
')';
402 }
else if (
B.Seg.Val !=
nullptr) {
411 const AlignVectors::ByteSpan &BS) {
412 OS <<
"ByteSpan[size=" << BS.size() <<
", extent=" << BS.extent() <<
'\n';
413 for (
const AlignVectors::ByteSpan::Block &
B : BS)
433 HvxIdioms(
const HexagonVectorCombine &HVC_) : HVC(HVC_) {
434 auto *
Int32Ty = HVC.getIntTy(32);
435 HvxI32Ty = HVC.getHvxTy(
Int32Ty,
false);
436 HvxP32Ty = HVC.getHvxTy(
Int32Ty,
true);
457 std::optional<unsigned> RoundAt;
462 -> std::pair<unsigned, Signedness>;
463 auto canonSgn(SValue
X, SValue
Y)
const -> std::pair<SValue, SValue>;
465 auto matchFxpMul(
Instruction &In)
const -> std::optional<FxpOp>;
469 const FxpOp &
Op)
const ->
Value *;
471 bool Rounding)
const ->
Value *;
473 bool Rounding)
const ->
Value *;
476 Value *CarryIn =
nullptr)
const
477 -> std::pair<Value *, Value *>;
482 -> std::pair<Value *, Value *>;
496 const HexagonVectorCombine &HVC;
502 const HvxIdioms::FxpOp &
Op) {
503 static const char *SgnNames[] = {
"Positive",
"Signed",
"Unsigned"};
505 if (
Op.RoundAt.has_value()) {
506 if (
Op.Frac != 0 && *
Op.RoundAt ==
Op.Frac - 1) {
509 OS <<
" + 1<<" << *
Op.RoundAt;
512 OS <<
"\n X:(" << SgnNames[
Op.X.Sgn] <<
") " << *
Op.X.Val <<
"\n"
513 <<
" Y:(" << SgnNames[
Op.Y.Sgn] <<
") " << *
Op.Y.Val;
521template <
typename T>
T *getIfUnordered(
T *MaybeT) {
522 return MaybeT && MaybeT->isUnordered() ? MaybeT :
nullptr;
534#if !defined(_MSC_VER) || _MSC_VER >= 1926
538template <
typename Pred,
typename... Ts>
539void erase_if(std::map<Ts...> &map, Pred p)
541template <
typename Pred,
typename T,
typename U>
542void erase_if(std::map<T, U> &map, Pred p)
545 for (
auto i = map.begin(), e = map.end(); i != e;) {
554template <
typename Pred,
typename T>
void erase_if(
T &&container, Pred p) {
592auto AlignVectors::ByteSpan::extent()
const ->
int {
595 int Min = Blocks[0].Pos;
596 int Max = Blocks[0].Pos + Blocks[0].Seg.Size;
597 for (
int i = 1, e =
size(); i !=
e; ++i) {
598 Min = std::min(Min, Blocks[i].Pos);
599 Max = std::max(Max, Blocks[i].Pos + Blocks[i].Seg.Size);
604auto AlignVectors::ByteSpan::section(
int Start,
int Length)
const -> ByteSpan {
606 for (
const ByteSpan::Block &
B : Blocks) {
607 int L = std::max(
B.Pos, Start);
608 int R = std::min(
B.Pos +
B.Seg.Size, Start +
Length);
611 int Off =
L >
B.Pos ?
L -
B.Pos : 0;
612 Section.Blocks.emplace_back(
B.Seg.Val,
B.Seg.Start + Off, R - L, L);
618auto AlignVectors::ByteSpan::shift(
int Offset) -> ByteSpan & {
625 SmallVector<Value *, 8> Values(Blocks.size());
626 for (
int i = 0, e = Blocks.size(); i != e; ++i)
627 Values[i] = Blocks[i].Seg.Val;
631auto AlignVectors::getAddrInfo(Instruction &In)
const
632 -> std::optional<AddrInfo> {
634 return AddrInfo(HVC, L,
L->getPointerOperand(),
L->getType(),
637 return AddrInfo(HVC, S, S->getPointerOperand(),
638 S->getValueOperand()->getType(), S->getAlign());
642 case Intrinsic::masked_load:
643 return AddrInfo(HVC,
II,
II->getArgOperand(0),
II->getType(),
644 II->getParamAlign(0).valueOrOne());
645 case Intrinsic::masked_store:
646 return AddrInfo(HVC,
II,
II->getArgOperand(1),
647 II->getArgOperand(0)->getType(),
648 II->getParamAlign(1).valueOrOne());
654auto AlignVectors::isHvx(
const AddrInfo &AI)
const ->
bool {
658auto AlignVectors::getPayload(
Value *Val)
const ->
Value * {
662 ID =
II->getIntrinsicID();
664 return In->getOperand(0);
669auto AlignVectors::getMask(
Value *Val)
const ->
Value * {
671 switch (
II->getIntrinsicID()) {
672 case Intrinsic::masked_load:
673 return II->getArgOperand(1);
674 case Intrinsic::masked_store:
675 return II->getArgOperand(2);
679 Type *ValTy = getPayload(Val)->getType();
685auto AlignVectors::getPassThrough(
Value *Val)
const ->
Value * {
687 if (
II->getIntrinsicID() == Intrinsic::masked_load)
688 return II->getArgOperand(2);
693auto AlignVectors::createAdjustedPointer(IRBuilderBase &Builder,
Value *Ptr,
694 Type *ValTy,
int Adjust,
695 const InstMap &CloneMap)
const
698 if (Instruction *New = CloneMap.lookup(
I))
700 return Builder.CreatePtrAdd(Ptr, HVC.getConstInt(Adjust),
"gep");
703auto AlignVectors::createAlignedPointer(IRBuilderBase &Builder,
Value *Ptr,
704 Type *ValTy,
int Alignment,
705 const InstMap &CloneMap)
const
709 for (
auto [Old, New] : CloneMap)
710 I->replaceUsesOfWith(Old, New);
715 Value *AsInt = Builder.CreatePtrToInt(Ptr, HVC.getIntTy(),
"pti");
716 Value *
Mask = HVC.getConstInt(-Alignment);
717 Value *
And = Builder.CreateAnd(remap(AsInt), Mask,
"and");
718 return Builder.CreateIntToPtr(
722auto AlignVectors::createLoad(IRBuilderBase &Builder,
Type *ValTy,
Value *Ptr,
730 "Expectning scalar predicate");
731 if (HVC.isFalse(Predicate))
733 if (!HVC.isTrue(Predicate) && HvxHasPredLoad) {
734 Value *
Load = createPredicatedLoad(Builder, ValTy, Ptr, Predicate,
735 Alignment, MDSources);
736 return Builder.CreateSelect(Mask, Load, PassThru);
740 assert(!HVC.isUndef(Mask));
741 if (HVC.isZero(Mask))
743 if (HVC.isTrue(Mask))
744 return createSimpleLoad(Builder, ValTy, Ptr, Alignment, MDSources);
747 Mask, PassThru,
"mld");
752auto AlignVectors::createSimpleLoad(IRBuilderBase &Builder,
Type *ValTy,
753 Value *Ptr,
int Alignment,
757 Builder.CreateAlignedLoad(ValTy, Ptr,
Align(Alignment),
"ald");
762auto AlignVectors::createPredicatedLoad(IRBuilderBase &Builder,
Type *ValTy,
768 "Predicates 'scalar' vector loads not yet supported");
770 assert(!
Predicate->getType()->isVectorTy() &&
"Expectning scalar predicate");
771 assert(HVC.getSizeOf(ValTy, HVC.Alloc) % Alignment == 0);
772 if (HVC.isFalse(Predicate))
774 if (HVC.isTrue(Predicate))
775 return createSimpleLoad(Builder, ValTy, Ptr, Alignment, MDSources);
777 auto V6_vL32b_pred_ai = HVC.HST.
getIntrinsicId(Hexagon::V6_vL32b_pred_ai);
779 return HVC.createHvxIntrinsic(Builder, V6_vL32b_pred_ai, ValTy,
780 {
Predicate, Ptr, HVC.getConstInt(0)}, {},
784auto AlignVectors::createStore(IRBuilderBase &Builder,
Value *Val,
Value *Ptr,
787 if (HVC.isZero(Mask) || HVC.isUndef(Val) || HVC.isUndef(Mask))
790 "Expectning scalar predicate"));
792 if (HVC.isFalse(Predicate))
794 if (HVC.isTrue(Predicate))
799 if (HVC.isTrue(Mask)) {
801 return createPredicatedStore(Builder, Val, Ptr, Predicate, Alignment,
805 return createSimpleStore(Builder, Val, Ptr, Alignment, MDSources);
811 Builder.CreateMaskedStore(Val, Ptr,
Align(Alignment), Mask);
818 Value *PredLoad = createPredicatedLoad(Builder, Val->getType(), Ptr,
819 Predicate, Alignment, MDSources);
820 Value *Mux = Builder.CreateSelect(Mask, Val, PredLoad);
821 return createPredicatedStore(Builder, Mux, Ptr, Predicate, Alignment,
825auto AlignVectors::createSimpleStore(IRBuilderBase &Builder,
Value *Val,
826 Value *Ptr,
int Alignment,
834auto AlignVectors::createPredicatedStore(IRBuilderBase &Builder,
Value *Val,
840 "Predicates 'scalar' vector stores not yet supported");
842 if (HVC.isFalse(Predicate))
844 if (HVC.isTrue(Predicate))
845 return createSimpleStore(Builder, Val, Ptr, Alignment, MDSources);
847 assert(HVC.getSizeOf(Val, HVC.Alloc) % Alignment == 0);
848 auto V6_vS32b_pred_ai = HVC.HST.
getIntrinsicId(Hexagon::V6_vS32b_pred_ai);
850 return HVC.createHvxIntrinsic(Builder, V6_vS32b_pred_ai,
nullptr,
851 {
Predicate, Ptr, HVC.getConstInt(0), Val}, {},
855auto AlignVectors::getUpwardDeps(Instruction *In, Instruction *
Base)
const
859 "Base and In should be in the same block");
860 assert(
Base->comesBefore(In) &&
"Base should come before In");
863 std::deque<Instruction *> WorkQ = {
In};
864 while (!WorkQ.empty()) {
871 if (
I->getParent() == Parent &&
Base->comesBefore(
I))
879auto AlignVectors::createAddressGroups() ->
bool {
884 auto findBaseAndOffset = [&](AddrInfo &AI) -> std::pair<Instruction *, int> {
885 for (AddrInfo &W : WorkStack) {
886 if (
auto D = HVC.calculatePointerDifference(AI.Addr,
W.Addr))
887 return std::make_pair(
W.Inst, *
D);
889 return std::make_pair(
nullptr, 0);
892 auto traverseBlock = [&](
DomTreeNode *DomN,
auto Visit) ->
void {
894 for (Instruction &
I :
Block) {
895 auto AI = this->getAddrInfo(
I);
898 auto F = findBaseAndOffset(*AI);
900 if (Instruction *BI =
F.first) {
901 AI->Offset =
F.second;
904 WorkStack.push_back(*AI);
905 GroupInst = AI->Inst;
907 AddrGroups[GroupInst].push_back(*AI);
913 while (!WorkStack.empty() && WorkStack.back().Inst->getParent() == &
Block)
914 WorkStack.pop_back();
917 traverseBlock(HVC.DT.
getRootNode(), traverseBlock);
918 assert(WorkStack.empty());
923 erase_if(AddrGroups, [](
auto &
G) {
return G.second.size() == 1; });
927 G.second, [&](
auto &
I) { return HVC.HST.isTypeForHVX(I.ValTy); });
930 return !AddrGroups.empty();
933auto AlignVectors::createLoadGroups(
const AddrList &Group)
const -> MoveList {
941 auto tryAddTo = [&](
const AddrInfo &
Info, MoveGroup &Move) {
942 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
946 if (Move.IsHvx != isHvx(
Info))
950 if (
Base->getParent() !=
Info.Inst->getParent())
953 if (!HVC.isSafeToMoveBeforeInBB(*
Info.Inst,
Base->getIterator()))
957 return HVC.isSafeToMoveBeforeInBB(*
I,
Base->getIterator()) &&
958 HVC.isSafeToClone(*
I);
960 DepList Deps = getUpwardDeps(
Info.Inst,
Base);
964 Move.Main.push_back(
Info.Inst);
971 for (
const AddrInfo &
Info : Group) {
972 if (!
Info.Inst->mayReadFromMemory())
974 if (LoadGroups.empty() || !tryAddTo(
Info, LoadGroups.back()))
975 LoadGroups.emplace_back(
Info, Group.front().Inst, isHvx(
Info),
true);
979 erase_if(LoadGroups, [](
const MoveGroup &
G) {
return G.Main.size() <= 1; });
983 erase_if(LoadGroups, [](
const MoveGroup &
G) {
return G.IsHvx; });
988auto AlignVectors::createStoreGroups(
const AddrList &Group)
const -> MoveList {
996 auto tryAddTo = [&](
const AddrInfo &
Info, MoveGroup &Move) {
997 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
1003 "Not handling stores with return values");
1005 if (Move.IsHvx != isHvx(
Info))
1011 if (
Base->getParent() !=
Info.Inst->getParent())
1013 if (!HVC.isSafeToMoveBeforeInBB(*
Info.Inst,
Base->getIterator(), Move.Main))
1015 Move.Main.push_back(
Info.Inst);
1019 MoveList StoreGroups;
1021 for (
auto I = Group.rbegin(),
E = Group.rend();
I !=
E; ++
I) {
1022 const AddrInfo &
Info = *
I;
1023 if (!
Info.Inst->mayWriteToMemory())
1025 if (StoreGroups.empty() || !tryAddTo(
Info, StoreGroups.back()))
1026 StoreGroups.emplace_back(
Info, Group.front().Inst, isHvx(
Info),
false);
1030 erase_if(StoreGroups, [](
const MoveGroup &
G) {
return G.Main.size() <= 1; });
1034 erase_if(StoreGroups, [](
const MoveGroup &
G) {
return G.IsHvx; });
1039 if (!VADoFullStores) {
1040 erase_if(StoreGroups, [
this](
const MoveGroup &
G) {
1042 auto MaybeInfo = this->getAddrInfo(*S);
1043 assert(MaybeInfo.has_value());
1044 return HVC.HST.isHVXVectorType(
1045 EVT::getEVT(MaybeInfo->ValTy, false));
1053auto AlignVectors::moveTogether(MoveGroup &Move)
const ->
bool {
1055 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
1061 Move.Clones = cloneBefore(Where->
getIterator(), Move.Deps);
1064 for (Instruction *M : Main) {
1066 M->moveAfter(Where);
1067 for (
auto [Old, New] : Move.Clones)
1068 M->replaceUsesOfWith(Old, New);
1072 for (
int i = 0, e = Move.Deps.size(); i != e; ++i)
1073 Move.Deps[i] = Move.Clones[Move.Deps[i]];
1078 assert(Move.Deps.empty());
1081 for (Instruction *M : Main.drop_front(1)) {
1087 return Move.Main.size() + Move.Deps.size() > 1;
1090template <
typename T>
1095 for (Instruction *
I : Insts) {
1096 assert(HVC.isSafeToClone(*
I));
1098 C->setName(Twine(
"c.") +
I->getName() +
".");
1099 C->insertBefore(To);
1101 for (
auto [Old, New] : Map)
1102 C->replaceUsesOfWith(Old, New);
1103 Map.insert(std::make_pair(
I,
C));
1108auto AlignVectors::realignLoadGroup(IRBuilderBase &Builder,
1109 const ByteSpan &VSpan,
int ScLen,
1114 Type *SecTy = HVC.getByteTy(ScLen);
1115 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
1116 bool DoAlign = !HVC.isZero(AlignVal);
1118 BasicBlock *BaseBlock = Builder.GetInsertBlock();
1148 for (
int Index = 0;
Index != NumSectors; ++
Index)
1149 ASpan.Blocks.emplace_back(
nullptr, ScLen, Index * ScLen);
1150 for (
int Index = 0;
Index != NumSectors; ++
Index) {
1151 ASpan.Blocks[
Index].Seg.Val =
1152 reinterpret_cast<Value *
>(&ASpan.Blocks[
Index]);
1158 DenseMap<void *, Instruction *> EarliestUser;
1164 assert(
A->getParent() ==
B->getParent());
1165 return A->comesBefore(
B);
1167 auto earliestUser = [&](
const auto &
Uses) {
1169 for (
const Use &U :
Uses) {
1171 assert(
I !=
nullptr &&
"Load used in a non-instruction?");
1175 if (
I->getParent() == BaseBlock) {
1177 User = std::min(User,
I, isEarlier);
1185 for (
const ByteSpan::Block &
B : VSpan) {
1186 ByteSpan ASection = ASpan.section(
B.Pos,
B.Seg.Size);
1187 for (
const ByteSpan::Block &S : ASection) {
1188 auto &EU = EarliestUser[S.Seg.Val];
1189 EU = std::min(EU, earliestUser(
B.Seg.Val->uses()), isEarlier);
1194 dbgs() <<
"ASpan:\n" << ASpan <<
'\n';
1195 dbgs() <<
"Earliest users of ASpan:\n";
1196 for (
auto &[Val, User] : EarliestUser) {
1197 dbgs() << Val <<
"\n ->" << *
User <<
'\n';
1201 auto createLoad = [&](IRBuilderBase &Builder,
const ByteSpan &VSpan,
1202 int Index,
bool MakePred) {
1204 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
1206 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
1211 int Width = (1 + DoAlign) * ScLen;
1212 return this->createLoad(Builder, SecTy, Ptr, Predicate, ScLen, True, Undef,
1213 VSpan.section(Start, Width).values());
1218 assert(
In->getParent() == To->getParent());
1219 DepList Deps = getUpwardDeps(&*In, &*To);
1222 InstMap
Map = cloneBefore(In, Deps);
1223 for (
auto [Old, New] : Map)
1224 In->replaceUsesOfWith(Old, New);
1229 for (
int Index = 0;
Index != NumSectors + 1; ++
Index) {
1237 DoAlign &&
Index > 0 ? EarliestUser[&ASpan[
Index - 1]] :
nullptr;
1239 Index < NumSectors ? EarliestUser[&ASpan[
Index]] :
nullptr;
1240 if (
auto *Where = std::min(PrevAt, ThisAt, isEarlier)) {
1243 createLoad(Builder, VSpan, Index, DoAlign && Index == NumSectors);
1251 if (!HVC.isSafeToMoveBeforeInBB(*Load, BasePos))
1252 moveBefore(
Load->getIterator(), BasePos);
1254 LLVM_DEBUG(
dbgs() <<
"Loads[" << Index <<
"]:" << *Loads[Index] <<
'\n');
1260 for (
int Index = 0;
Index != NumSectors; ++
Index) {
1261 ASpan[
Index].Seg.Val =
nullptr;
1262 if (
auto *Where = EarliestUser[&ASpan[Index]]) {
1268 assert(NextLoad !=
nullptr);
1269 Val = HVC.vralignb(Builder, Val, NextLoad, AlignVal);
1271 ASpan[
Index].Seg.Val = Val;
1276 for (
const ByteSpan::Block &
B : VSpan) {
1277 ByteSpan ASection = ASpan.section(
B.Pos,
B.Seg.Size).shift(-
B.Pos);
1284 std::vector<ByteSpan::Block *> ABlocks;
1285 for (ByteSpan::Block &S : ASection) {
1286 if (S.Seg.Val !=
nullptr)
1287 ABlocks.push_back(&S);
1290 [&](
const ByteSpan::Block *
A,
const ByteSpan::Block *
B) {
1294 for (ByteSpan::Block *S : ABlocks) {
1299 Value *Pay = HVC.vbytes(Builder, getPayload(S->Seg.Val));
1301 HVC.insertb(Builder, Accum, Pay, S->Seg.Start, S->Seg.Size, S->Pos);
1309 Type *ValTy = getPayload(
B.Seg.Val)->getType();
1312 getPassThrough(
B.Seg.Val),
"sel");
1317auto AlignVectors::realignStoreGroup(IRBuilderBase &Builder,
1318 const ByteSpan &VSpan,
int ScLen,
1323 Type *SecTy = HVC.getByteTy(ScLen);
1324 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
1325 bool DoAlign = !HVC.isZero(AlignVal);
1328 ByteSpan ASpanV, ASpanM;
1332 auto MakeVec = [](IRBuilderBase &Builder,
Value *Val) ->
Value * {
1336 auto *VecTy = VectorType::get(Ty, 1,
false);
1342 for (
int Index = (DoAlign ? -1 : 0);
Index != NumSectors + DoAlign; ++
Index) {
1346 VSpan.section(Index * ScLen, ScLen).shift(-Index * ScLen);
1351 for (ByteSpan::Block &S : VSection) {
1352 Value *Pay = getPayload(S.Seg.Val);
1354 Pay->
getType(), HVC.getByteTy());
1355 Value *PartM = HVC.insertb(Builder, Zero, HVC.vbytes(Builder, Mask),
1356 S.Seg.Start, S.Seg.Size, S.Pos);
1357 AccumM = Builder.
CreateOr(AccumM, PartM);
1359 Value *PartV = HVC.insertb(Builder, Undef, HVC.vbytes(Builder, Pay),
1360 S.Seg.Start, S.Seg.Size, S.Pos);
1365 ASpanV.Blocks.emplace_back(AccumV, ScLen, Index * ScLen);
1366 ASpanM.Blocks.emplace_back(AccumM, ScLen, Index * ScLen);
1370 dbgs() <<
"ASpanV before vlalign:\n" << ASpanV <<
'\n';
1371 dbgs() <<
"ASpanM before vlalign:\n" << ASpanM <<
'\n';
1376 for (
int Index = 1;
Index != NumSectors + 2; ++
Index) {
1377 Value *PrevV = ASpanV[
Index - 1].Seg.Val, *ThisV = ASpanV[
Index].Seg.Val;
1378 Value *PrevM = ASpanM[
Index - 1].Seg.Val, *ThisM = ASpanM[
Index].Seg.Val;
1380 ASpanV[
Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevV, ThisV, AlignVal);
1381 ASpanM[
Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevM, ThisM, AlignVal);
1386 dbgs() <<
"ASpanV after vlalign:\n" << ASpanV <<
'\n';
1387 dbgs() <<
"ASpanM after vlalign:\n" << ASpanM <<
'\n';
1390 auto createStore = [&](IRBuilderBase &Builder,
const ByteSpan &ASpanV,
1391 const ByteSpan &ASpanM,
int Index,
bool MakePred) {
1394 if (HVC.isUndef(Val) || HVC.isZero(Mask))
1397 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
1399 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
1404 int Width = (1 + DoAlign) * ScLen;
1405 this->createStore(Builder, Val, Ptr, Predicate, ScLen,
1406 HVC.vlsb(Builder, Mask),
1407 VSpan.section(Start, Width).values());
1410 for (
int Index = 0;
Index != NumSectors + DoAlign; ++
Index) {
1411 createStore(Builder, ASpanV, ASpanM, Index, DoAlign && Index == NumSectors);
1415auto AlignVectors::realignGroup(
const MoveGroup &Move)
const ->
bool {
1424 auto getMaxOf = [](
auto Range,
auto GetValue) {
1426 return GetValue(
A) < GetValue(
B);
1430 const AddrList &BaseInfos = AddrGroups.at(Move.Base);
1445 std::set<Instruction *> TestSet(Move.Main.begin(), Move.Main.end());
1448 BaseInfos, std::back_inserter(MoveInfos),
1449 [&TestSet](
const AddrInfo &AI) {
return TestSet.count(AI.Inst); });
1452 const AddrInfo &WithMaxAlign =
1453 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return AI.HaveAlign; });
1454 Align MaxGiven = WithMaxAlign.HaveAlign;
1457 const AddrInfo &WithMinOffset =
1458 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return -AI.Offset; });
1460 const AddrInfo &WithMaxNeeded =
1461 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return AI.NeedAlign; });
1462 Align MinNeeded = WithMaxNeeded.NeedAlign;
1475 InstSimplifyFolder(HVC.DL));
1476 Value *AlignAddr =
nullptr;
1477 Value *AlignVal =
nullptr;
1479 if (MinNeeded <= MaxGiven) {
1480 int Start = WithMinOffset.Offset;
1481 int OffAtMax = WithMaxAlign.Offset;
1488 int Adjust = -
alignTo(OffAtMax - Start, MinNeeded.value());
1489 AlignAddr = createAdjustedPointer(Builder, WithMaxAlign.Addr,
1490 WithMaxAlign.ValTy, Adjust, Move.Clones);
1491 int Diff =
Start - (OffAtMax + Adjust);
1492 AlignVal = HVC.getConstInt(Diff);
1494 assert(
static_cast<decltype(MinNeeded.value())
>(Diff) < MinNeeded.value());
1504 createAlignedPointer(Builder, WithMinOffset.Addr, WithMinOffset.ValTy,
1505 MinNeeded.value(), Move.Clones);
1507 Builder.
CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy(),
"pti");
1509 for (
auto [Old, New] : Move.Clones)
1510 I->replaceUsesOfWith(Old, New);
1515 for (
const AddrInfo &AI : MoveInfos) {
1516 VSpan.Blocks.emplace_back(AI.Inst, HVC.getSizeOf(AI.ValTy),
1517 AI.Offset - WithMinOffset.Offset);
1524 : std::max<int>(MinNeeded.value(), 4);
1525 assert(!Move.IsHvx || ScLen == 64 || ScLen == 128);
1526 assert(Move.IsHvx || ScLen == 4 || ScLen == 8);
1529 dbgs() <<
"ScLen: " << ScLen <<
"\n";
1530 dbgs() <<
"AlignVal:" << *AlignVal <<
"\n";
1531 dbgs() <<
"AlignAddr:" << *AlignAddr <<
"\n";
1532 dbgs() <<
"VSpan:\n" << VSpan <<
'\n';
1536 realignLoadGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1538 realignStoreGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1540 for (
auto *Inst : Move.Main)
1541 Inst->eraseFromParent();
1546auto AlignVectors::makeTestIfUnaligned(IRBuilderBase &Builder,
Value *AlignVal,
1547 int Alignment)
const ->
Value * {
1548 auto *AlignTy = AlignVal->getType();
1550 AlignVal, ConstantInt::get(AlignTy, Alignment - 1),
"and");
1551 Value *
Zero = ConstantInt::get(AlignTy, 0);
1555auto AlignVectors::isSectorTy(
Type *Ty)
const ->
bool {
1556 if (!HVC.isByteVecTy(Ty))
1558 int Size = HVC.getSizeOf(Ty);
1564auto AlignVectors::run() ->
bool {
1567 if (!createAddressGroups())
1571 dbgs() <<
"Address groups(" << AddrGroups.size() <<
"):\n";
1572 for (
auto &[In, AL] : AddrGroups) {
1573 for (
const AddrInfo &AI : AL)
1574 dbgs() <<
"---\n" << AI <<
'\n';
1579 MoveList LoadGroups, StoreGroups;
1581 for (
auto &
G : AddrGroups) {
1587 dbgs() <<
"\nLoad groups(" << LoadGroups.size() <<
"):\n";
1588 for (
const MoveGroup &
G : LoadGroups)
1589 dbgs() <<
G <<
"\n";
1590 dbgs() <<
"Store groups(" << StoreGroups.size() <<
"):\n";
1591 for (
const MoveGroup &
G : StoreGroups)
1592 dbgs() <<
G <<
"\n";
1596 unsigned CountLimit = VAGroupCountLimit;
1597 if (CountLimit == 0)
1600 if (LoadGroups.size() > CountLimit) {
1601 LoadGroups.resize(CountLimit);
1602 StoreGroups.clear();
1604 unsigned StoreLimit = CountLimit - LoadGroups.size();
1605 if (StoreGroups.size() > StoreLimit)
1606 StoreGroups.resize(StoreLimit);
1609 for (
auto &M : LoadGroups)
1611 for (
auto &M : StoreGroups)
1616 for (
auto &M : LoadGroups)
1618 for (
auto &M : StoreGroups)
1628auto HvxIdioms::getNumSignificantBits(
Value *V, Instruction *In)
const
1629 -> std::pair<unsigned, Signedness> {
1630 unsigned Bits = HVC.getNumSignificantBits(V, In);
1636 KnownBits Known = HVC.getKnownBits(V, In);
1637 Signedness Sign =
Signed;
1638 unsigned NumToTest = 0;
1642 NumToTest =
Bits - 1;
1655 return {
Bits, Sign};
1658auto HvxIdioms::canonSgn(SValue
X, SValue
Y)
const
1659 -> std::pair<SValue, SValue> {
1672auto HvxIdioms::matchFxpMul(Instruction &In)
const -> std::optional<FxpOp> {
1673 using namespace PatternMatch;
1674 auto *Ty =
In.getType();
1677 return std::nullopt;
1686 auto m_Shr = [](
auto &&
V,
auto &&S) {
1698 if (
Op.Frac > Width)
1699 return std::nullopt;
1706 return std::nullopt;
1714 Op.Opcode = Instruction::Mul;
1716 Op.X.Sgn = getNumSignificantBits(
Op.X.Val, &In).second;
1717 Op.Y.Sgn = getNumSignificantBits(
Op.Y.Val, &In).second;
1722 return std::nullopt;
1725auto HvxIdioms::processFxpMul(Instruction &In,
const FxpOp &
Op)
const
1727 assert(
Op.X.Val->getType() ==
Op.Y.Val->getType());
1730 if (VecTy ==
nullptr)
1733 unsigned ElemWidth = ElemTy->getBitWidth();
1736 if ((HVC.length(VecTy) * ElemWidth) % (8 * HVC.HST.
getVectorLength()) != 0)
1746 if (ElemWidth <= 32 &&
Op.Frac == 0)
1749 auto [BitsX, SignX] = getNumSignificantBits(
Op.X.Val, &In);
1750 auto [BitsY, SignY] = getNumSignificantBits(
Op.Y.Val, &In);
1756 InstSimplifyFolder(HVC.DL));
1758 auto roundUpWidth = [](
unsigned Width) ->
unsigned {
1764 if (Width > 32 && Width % 32 != 0) {
1771 BitsX = roundUpWidth(BitsX);
1772 BitsY = roundUpWidth(BitsY);
1777 unsigned Width = std::max(BitsX, BitsY);
1779 auto *ResizeTy = VectorType::get(HVC.getIntTy(Width), VecTy);
1780 if (Width < ElemWidth) {
1783 }
else if (Width > ElemWidth) {
1790 assert(
X->getType() ==
Y->getType() &&
X->getType() == ResizeTy);
1792 unsigned VecLen = HVC.length(ResizeTy);
1793 unsigned ChopLen = (8 * HVC.HST.
getVectorLength()) / std::min(Width, 32u);
1797 ChopOp.ResTy = VectorType::get(
Op.ResTy->getElementType(), ChopLen,
false);
1799 for (
unsigned V = 0;
V != VecLen / ChopLen; ++
V) {
1800 ChopOp.X.Val = HVC.subvector(Builder,
X, V * ChopLen, ChopLen);
1801 ChopOp.Y.Val = HVC.subvector(Builder,
Y, V * ChopLen, ChopLen);
1802 Results.push_back(processFxpMulChopped(Builder, In, ChopOp));
1817inline bool HvxIdioms::matchScatter(Instruction &In)
const {
1821 return (
II->getIntrinsicID() == Intrinsic::masked_scatter);
1824inline bool HvxIdioms::matchGather(Instruction &In)
const {
1828 return (
II->getIntrinsicID() == Intrinsic::masked_gather);
1836 case Instruction::Add:
1837 case Instruction::Sub:
1838 case Instruction::Mul:
1839 case Instruction::And:
1840 case Instruction::Or:
1841 case Instruction::Xor:
1842 case Instruction::AShr:
1843 case Instruction::LShr:
1844 case Instruction::Shl:
1845 case Instruction::UDiv:
1853 assert(Ptr &&
"Unable to extract pointer");
1859 if (
II->getIntrinsicID() == Intrinsic::masked_store)
1860 return II->getOperand(1);
1866 HvxIdioms::DstQualifier &Qual) {
1872 Qual = HvxIdioms::LdSt;
1874 if (
II->getIntrinsicID() == Intrinsic::masked_gather) {
1876 Qual = HvxIdioms::LLVM_Gather;
1877 }
else if (
II->getIntrinsicID() == Intrinsic::masked_scatter) {
1879 Qual = HvxIdioms::LLVM_Scatter;
1880 }
else if (
II->getIntrinsicID() == Intrinsic::masked_store) {
1882 Qual = HvxIdioms::LdSt;
1883 }
else if (
II->getIntrinsicID() ==
1884 Intrinsic::hexagon_V6_vgather_vscattermh) {
1886 Qual = HvxIdioms::HEX_Gather_Scatter;
1887 }
else if (
II->getIntrinsicID() == Intrinsic::hexagon_V6_vscattermh_128B) {
1889 Qual = HvxIdioms::HEX_Scatter;
1890 }
else if (
II->getIntrinsicID() == Intrinsic::hexagon_V6_vgathermh_128B) {
1892 Qual = HvxIdioms::HEX_Gather;
1900 Qual = HvxIdioms::Call;
1905 Qual = HvxIdioms::Arithmetic;
1925 for (
auto &U : In->uses()) {
1929 Users.push_back(Destination);
1941 assert(In &&
"Bad instruction");
1945 "Not a gather Intrinsic");
1974 if (
II &&
II->getIntrinsicID() == Intrinsic::masked_gather)
1984 auto *Src = IE->getOperand(1);
1999 LLVM_DEBUG(
dbgs() <<
" Unable to locate Address from intrinsic\n");
2011 if (
II->getIntrinsicID() == Intrinsic::masked_load)
2012 return II->getType();
2013 if (
II->getIntrinsicID() == Intrinsic::masked_store)
2014 return II->getOperand(0)->getType();
2016 return In->getType();
2025 if (
II->getIntrinsicID() == Intrinsic::masked_load)
2027 if (
II->getIntrinsicID() == Intrinsic::masked_gather)
2039 return cstDataVector;
2041 return GEPIndex->getOperand(0);
2057 LLVM_DEBUG(
dbgs() <<
" Unable to locate Index from intrinsic\n");
2067 assert(
I &&
"Unable to reinterprete cast");
2068 Type *NT = HVC.getHvxTy(HVC.getIntTy(32),
false);
2069 std::vector<unsigned> shuffleMask;
2070 for (
unsigned i = 0; i < 64; ++i)
2071 shuffleMask.push_back(i);
2073 Value *CastShuffle =
2074 Builder.CreateShuffleVector(
I,
I, Mask,
"identity_shuffle");
2075 return Builder.CreateBitCast(CastShuffle, NT,
"cst64_i16_to_32_i32");
2082 assert(
I &&
"Unable to reinterprete cast");
2083 Type *NT = HVC.getHvxTy(HVC.getIntTy(32),
false);
2084 std::vector<unsigned> shuffleMask;
2085 for (
unsigned i = 0; i < 128; ++i)
2086 shuffleMask.push_back(i);
2088 Value *CastShuffle =
2089 Builder.CreateShuffleVector(
I,
I, Mask,
"identity_shuffle");
2090 return Builder.CreateBitCast(CastShuffle, NT,
"cst128_i8_to_32_i32");
2096 unsigned int pattern) {
2097 std::vector<unsigned int> byteMask;
2098 for (
unsigned i = 0; i < 32; ++i)
2099 byteMask.push_back(pattern);
2101 return Builder.CreateIntrinsic(
2103 {llvm::ConstantDataVector::get(Ctx, byteMask), HVC.getConstInt(~0)},
2107Value *HvxIdioms::processVScatter(Instruction &In)
const {
2109 assert(InpTy &&
"Cannot handle no vector type for llvm.scatter/gather");
2110 unsigned InpSize = HVC.getSizeOf(InpTy);
2111 auto *
F =
In.getFunction();
2112 LLVMContext &Ctx =
F->getContext();
2114 assert(ElemTy &&
"llvm.scatter needs integer type argument");
2117 unsigned Elements = HVC.length(InpTy);
2118 dbgs() <<
"\n[Process scatter](" <<
In <<
")\n" << *
In.getParent() <<
"\n";
2119 dbgs() <<
" Input type(" << *InpTy <<
") elements(" <<
Elements
2120 <<
") VecLen(" << InpSize <<
") type(" << *ElemTy <<
") ElemWidth("
2121 << ElemWidth <<
")\n";
2125 InstSimplifyFolder(HVC.DL));
2127 auto *ValueToScatter =
In.getOperand(0);
2128 LLVM_DEBUG(
dbgs() <<
" ValueToScatter : " << *ValueToScatter <<
"\n");
2132 <<
") for vscatter\n");
2157 Value *CastIndex =
nullptr;
2158 if (cstDataVector) {
2160 AllocaInst *IndexesAlloca =
2161 Builder.
CreateAlloca(HVC.getHvxTy(HVC.getIntTy(32),
false));
2162 [[maybe_unused]]
auto *StoreIndexes =
2163 Builder.
CreateStore(cstDataVector, IndexesAlloca);
2164 LLVM_DEBUG(
dbgs() <<
" StoreIndexes : " << *StoreIndexes <<
"\n");
2166 IndexesAlloca,
"reload_index");
2171 CastIndex = Indexes;
2175 if (ElemWidth == 1) {
2178 Type *
NT = HVC.getHvxTy(HVC.getIntTy(32),
false);
2184 HVC.getHvxTy(HVC.getIntTy(32),
true), V6_vunpack, CastIndexes,
nullptr);
2185 LLVM_DEBUG(
dbgs() <<
" UnpackedIndexes : " << *UnpackedIndexes <<
")\n");
2189 [[maybe_unused]]
Value *IndexHi =
2190 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes);
2191 [[maybe_unused]]
Value *IndexLo =
2192 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes);
2200 HVC.getHvxTy(HVC.getIntTy(32),
true), V6_vunpack, CastSrc,
nullptr);
2201 LLVM_DEBUG(
dbgs() <<
" UnpackedValToScat: " << *UnpackedValueToScatter
2204 [[maybe_unused]]
Value *UVSHi =
2205 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedValueToScatter);
2206 [[maybe_unused]]
Value *UVSLo =
2207 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedValueToScatter);
2212 auto *QByteMask =
get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff);
2215 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B,
2221 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B,
2225 }
else if (ElemWidth == 2) {
2230 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermh_128B,
2234 }
else if (ElemWidth == 4) {
2236 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermw_128B,
2246Value *HvxIdioms::processVGather(Instruction &In)
const {
2247 [[maybe_unused]]
auto *InpTy =
2249 assert(InpTy &&
"Cannot handle no vector type for llvm.gather");
2250 [[maybe_unused]]
auto *ElemTy =
2252 assert(ElemTy &&
"llvm.gather needs vector of ptr argument");
2253 auto *
F =
In.getFunction();
2254 LLVMContext &Ctx =
F->getContext();
2256 << *
In.getParent() <<
"\n");
2258 << HVC.length(InpTy) <<
") VecLen(" << HVC.getSizeOf(InpTy)
2259 <<
") type(" << *ElemTy <<
") Access alignment("
2260 << *
In.getOperand(1) <<
") AddressSpace("
2261 << ElemTy->getAddressSpace() <<
")\n");
2265 "llvm.gather needs vector for mask");
2267 InstSimplifyFolder(HVC.DL));
2272 HvxIdioms::DstQualifier Qual = HvxIdioms::Undefined;
2278 LLVM_DEBUG(
dbgs() <<
" Destination : " << *Dst <<
" Qual(" << Qual
2284 LLVM_DEBUG(
dbgs() <<
"Could not locate vgather destination ptr\n");
2290 assert(DstType &&
"Cannot handle non vector dst type for llvm.gather");
2305 Type *
NT = HVC.getHvxTy(HVC.getIntTy(32),
false);
2306 if (Qual == HvxIdioms::LdSt || Qual == HvxIdioms::Arithmetic) {
2310 unsigned OutputSize = HVC.getSizeOf(DstType);
2314 <<
" Address space ("
2316 <<
" Result type : " << *DstType
2317 <<
"\n Size in bytes : " << OutputSize
2318 <<
" element type(" << *DstElemTy
2319 <<
")\n ElemWidth : " << ElemWidth <<
" bytes\n");
2322 assert(IndexType &&
"Cannot handle non vector index type for llvm.gather");
2323 unsigned IndexWidth = HVC.DL.
getTypeAllocSize(IndexType->getElementType());
2328 IndexLoad, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2337 if (ElemWidth == 1) {
2342 Value *CastIndexes =
2345 auto *UnpackedIndexes =
2347 V6_vunpack, CastIndexes,
nullptr);
2353 [[maybe_unused]]
Value *IndexHi =
2354 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes);
2355 [[maybe_unused]]
Value *IndexLo =
2356 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes);
2360 auto *QByteMask =
get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff);
2364 auto V6_vgather = HVC.HST.
getIntrinsicId(Hexagon::V6_vgathermhq);
2366 Type::getVoidTy(Ctx), V6_vgather,
2367 {Ptr, QByteMask, CastedPtr,
2373 HVC.getHvxTy(HVC.getIntTy(32),
false), Ptr,
"temp_result_hi");
2374 LLVM_DEBUG(
dbgs() <<
" LoadedResultHi : " << *LoadedResultHi <<
"\n");
2379 Type::getVoidTy(Ctx), V6_vgather,
2380 {Ptr, QByteMask, CastedPtr,
2385 HVC.getHvxTy(HVC.getIntTy(32),
false), Ptr,
"temp_result_lo");
2386 LLVM_DEBUG(
dbgs() <<
" LoadedResultLo : " << *LoadedResultLo <<
"\n");
2393 NT, V6_vpackeb, {LoadedResultHi, LoadedResultLo},
nullptr);
2395 [[maybe_unused]]
auto *StoreRes = Builder.
CreateStore(Res, Ptr);
2397 }
else if (ElemWidth == 2) {
2399 if (IndexWidth == 2) {
2407 Value *AdjustedIndex = HVC.createHvxIntrinsic(
2408 Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)});
2410 <<
" Shifted half index: " << *AdjustedIndex <<
")\n");
2416 Type::getVoidTy(Ctx), V6_vgather,
2420 for (
auto &U : Dst->uses()) {
2422 dbgs() <<
" dst used by: " << *UI <<
"\n";
2424 for (
auto &U :
In.uses()) {
2426 dbgs() <<
" In used by : " << *UI <<
"\n";
2431 HVC.getHvxTy(HVC.getIntTy(16),
false), Ptr,
"temp_result");
2432 LLVM_DEBUG(
dbgs() <<
" LoadedResult : " << *LoadedResult <<
"\n");
2433 In.replaceAllUsesWith(LoadedResult);
2435 dbgs() <<
" Unhandled index type for vgather\n";
2438 }
else if (ElemWidth == 4) {
2439 if (IndexWidth == 4) {
2442 Value *AdjustedIndex = HVC.createHvxIntrinsic(
2443 Builder, V6_vaslh, NT, {Indexes, HVC.getConstInt(2)});
2445 <<
" Shifted word index: " << *AdjustedIndex <<
")\n");
2447 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermw_128B,
2470 Dst->eraseFromParent();
2471 }
else if (Qual == HvxIdioms::LLVM_Scatter) {
2475 assert(DstInpTy &&
"Cannot handle no vector type for llvm.scatter");
2476 unsigned DstInpSize = HVC.getSizeOf(DstInpTy);
2477 unsigned DstElements = HVC.length(DstInpTy);
2479 assert(DstElemTy &&
"llvm.scatter needs vector of ptr argument");
2480 dbgs() <<
" Gather feeds into scatter\n Values to scatter : "
2481 << *Dst->getOperand(0) <<
"\n";
2482 dbgs() <<
" Dst type(" << *DstInpTy <<
") elements(" << DstElements
2483 <<
") VecLen(" << DstInpSize <<
") type(" << *DstElemTy
2484 <<
") Access alignment(" << *Dst->getOperand(2) <<
")\n";
2498 Src, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2518 Value *AdjustedIndex = HVC.createHvxIntrinsic(
2519 Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)});
2520 LLVM_DEBUG(
dbgs() <<
" Shifted half index: " << *AdjustedIndex <<
")\n");
2523 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2527 }
else if (Qual == HvxIdioms::HEX_Gather_Scatter) {
2535 if (cstDataVector) {
2540 [[maybe_unused]]
auto *StoreIndexes =
2541 Builder.
CreateStore(cstDataVector, IndexesAlloca);
2542 LLVM_DEBUG(
dbgs() <<
" StoreIndexes : " << *StoreIndexes <<
"\n");
2546 LLVM_DEBUG(
dbgs() <<
" ResultAlloca : " << *ResultAlloca <<
"\n");
2549 IndexLoad, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2553 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2554 {ResultAlloca, CastedSrc,
2558 HVC.getHvxTy(HVC.getIntTy(16),
false), ResultAlloca,
"temp_result");
2559 LLVM_DEBUG(
dbgs() <<
" LoadedResult : " << *LoadedResult <<
"\n");
2561 In.replaceAllUsesWith(LoadedResult);
2571 Src, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2584 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgather_vscattermh,
2590 }
else if (Qual == HvxIdioms::HEX_Scatter) {
2597 IndexLoad, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2604 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2609 HVC.getHvxTy(HVC.getIntTy(16),
false), ResultAlloca,
"temp_result");
2610 LLVM_DEBUG(
dbgs() <<
" LoadedResult : " << *LoadedResult <<
"\n");
2611 In.replaceAllUsesWith(LoadedResult);
2612 }
else if (Qual == HvxIdioms::HEX_Gather) {
2617 if (cstDataVector) {
2621 [[maybe_unused]]
auto *StoreIndexes =
2622 Builder.
CreateStore(cstDataVector, IndexesAlloca);
2623 LLVM_DEBUG(
dbgs() <<
" StoreIndexes : " << *StoreIndexes <<
"\n");
2628 <<
"\n AddressSpace: "
2632 IndexLoad, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2636 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2637 {ResultAlloca, CastedSrc,
2641 HVC.getHvxTy(HVC.getIntTy(16),
false), ResultAlloca,
"temp_result");
2642 LLVM_DEBUG(
dbgs() <<
" LoadedResult : " << *LoadedResult <<
"\n");
2644 In.replaceAllUsesWith(LoadedResult);
2647 }
else if (Qual == HvxIdioms::LLVM_Gather) {
2649 errs() <<
" Underimplemented vgather to vgather sequence\n";
2657auto HvxIdioms::processFxpMulChopped(IRBuilderBase &Builder, Instruction &In,
2658 const FxpOp &
Op)
const ->
Value * {
2659 assert(
Op.X.Val->getType() ==
Op.Y.Val->getType());
2661 unsigned Width = InpTy->getScalarSizeInBits();
2664 if (!
Op.RoundAt || *
Op.RoundAt ==
Op.Frac - 1) {
2667 Value *QMul =
nullptr;
2669 QMul = createMulQ15(Builder,
Op.X,
Op.Y, Rounding);
2670 }
else if (Width == 32) {
2671 QMul = createMulQ31(Builder,
Op.X,
Op.Y, Rounding);
2673 if (QMul !=
nullptr)
2679 assert(Width < 32 || Width % 32 == 0);
2689 assert(
Op.Frac != 0 &&
"Unshifted mul should have been skipped");
2690 if (
Op.Frac == 16) {
2692 if (
Value *MulH = createMulH16(Builder,
Op.X,
Op.Y))
2696 Value *Prod32 = createMul16(Builder,
Op.X,
Op.Y);
2699 ConstantInt::get(Prod32->
getType(), 1ull << *
Op.RoundAt);
2700 Prod32 = Builder.
CreateAdd(Prod32, RoundVal,
"add");
2705 ? Builder.
CreateAShr(Prod32, ShiftAmt,
"asr")
2706 : Builder.
CreateLShr(Prod32, ShiftAmt,
"lsr");
2707 return Builder.
CreateTrunc(Shifted, InpTy,
"trn");
2714 auto WordX = HVC.splitVectorElements(Builder,
Op.X.Val, 32);
2715 auto WordY = HVC.splitVectorElements(Builder,
Op.Y.Val, 32);
2716 auto WordP = createMulLong(Builder, WordX,
Op.X.Sgn, WordY,
Op.Y.Sgn);
2721 if (
Op.RoundAt.has_value()) {
2724 RoundV[*
Op.RoundAt / 32] =
2725 ConstantInt::get(HvxWordTy, 1ull << (*
Op.RoundAt % 32));
2726 WordP = createAddLong(Builder, WordP, RoundV);
2732 unsigned SkipWords =
Op.Frac / 32;
2733 Constant *ShiftAmt = ConstantInt::get(HvxWordTy,
Op.Frac % 32);
2735 for (
int Dst = 0, End = WordP.size() - SkipWords; Dst != End; ++Dst) {
2736 int Src = Dst + SkipWords;
2738 if (Src + 1 < End) {
2749 WordP.resize(WordP.size() - SkipWords);
2751 return HVC.joinVectorElements(Builder, WordP,
Op.ResTy);
2754auto HvxIdioms::createMulQ15(IRBuilderBase &Builder, SValue
X, SValue
Y,
2755 bool Rounding)
const ->
Value * {
2756 assert(
X.Val->getType() ==
Y.Val->getType());
2757 assert(
X.Val->getType()->getScalarType() == HVC.getIntTy(16));
2764 auto V6_vmpyhvsrs = HVC.HST.
getIntrinsicId(Hexagon::V6_vmpyhvsrs);
2765 return HVC.createHvxIntrinsic(Builder, V6_vmpyhvsrs,
X.Val->getType(),
2769auto HvxIdioms::createMulQ31(IRBuilderBase &Builder, SValue
X, SValue
Y,
2770 bool Rounding)
const ->
Value * {
2771 Type *InpTy =
X.Val->getType();
2772 assert(InpTy ==
Y.Val->getType());
2784 HVC.createHvxIntrinsic(Builder, V6_vmpyewuh, InpTy, {
X.Val,
Y.Val});
2785 return HVC.createHvxIntrinsic(Builder, V6_vmpyo_acc, InpTy,
2786 {V1,
X.Val,
Y.Val});
2789auto HvxIdioms::createAddCarry(IRBuilderBase &Builder,
Value *
X,
Value *
Y,
2790 Value *CarryIn)
const
2791 -> std::pair<Value *, Value *> {
2792 assert(
X->getType() ==
Y->getType());
2801 if (CarryIn ==
nullptr)
2803 Args.push_back(CarryIn);
2805 Value *Ret = HVC.createHvxIntrinsic(Builder, AddCarry,
2809 return {
Result, CarryOut};
2816 if (CarryIn !=
nullptr) {
2817 unsigned Width = VecTy->getScalarSizeInBits();
2820 for (
unsigned i = 0, e = 32 / Width; i !=
e; ++i)
2821 Mask = (Mask << Width) | 1;
2825 HVC.createHvxIntrinsic(Builder, V6_vandqrt,
nullptr,
2826 {CarryIn, HVC.getConstInt(Mask)});
2827 Result1 = Builder.
CreateAdd(
X, ValueIn,
"add");
2833 return {Result2, Builder.
CreateOr(CarryOut1, CarryOut2,
"orb")};
2836auto HvxIdioms::createMul16(IRBuilderBase &Builder, SValue
X, SValue
Y)
const
2839 std::tie(
X,
Y) = canonSgn(
X,
Y);
2852 HVC.createHvxIntrinsic(Builder, V6_vmpyh, HvxP32Ty, {
Y.Val,
X.Val});
2854 return HVC.vshuff(Builder, HVC.sublo(Builder,
P), HVC.subhi(Builder,
P));
2857auto HvxIdioms::createMulH16(IRBuilderBase &Builder, SValue
X, SValue
Y)
const
2859 Type *HvxI16Ty = HVC.getHvxTy(HVC.getIntTy(16),
false);
2864 return HVC.createHvxIntrinsic(Builder, V6_vmpyuhvs, HvxI16Ty,
2869 Type *HvxP16Ty = HVC.getHvxTy(HVC.getIntTy(16),
true);
2872 unsigned Len = HVC.length(HvxP16Ty) / 2;
2874 SmallVector<int, 128> PickOdd(Len);
2875 for (
int i = 0; i !=
static_cast<int>(
Len); ++i)
2876 PickOdd[i] = 2 * i + 1;
2879 HVC.sublo(Builder, Pair16), HVC.subhi(Builder, Pair16), PickOdd,
"shf");
2882auto HvxIdioms::createMul32(IRBuilderBase &Builder, SValue
X, SValue
Y)
const
2883 -> std::pair<Value *, Value *> {
2884 assert(
X.Val->getType() ==
Y.Val->getType());
2885 assert(
X.Val->getType() == HvxI32Ty);
2888 std::tie(
X,
Y) = canonSgn(
X,
Y);
2891 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyss_parts;
2893 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyus_parts;
2895 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyuu_parts;
2898 Value *Parts = HVC.createHvxIntrinsic(Builder, V6_vmpy_parts,
nullptr,
2899 {
X.Val,
Y.Val}, {HvxI32Ty});
2908 assert(WordX.size() == WordY.size());
2909 unsigned Idx = 0,
Length = WordX.size();
2913 if (HVC.isZero(WordX[Idx]))
2914 Sum[Idx] = WordY[Idx];
2915 else if (HVC.isZero(WordY[Idx]))
2916 Sum[Idx] = WordX[Idx];
2922 Value *Carry =
nullptr;
2923 for (; Idx !=
Length; ++Idx) {
2924 std::tie(Sum[Idx], Carry) =
2925 createAddCarry(Builder, WordX[Idx], WordY[Idx], Carry);
2939 for (
int i = 0, e = WordX.size(); i != e; ++i) {
2940 for (
int j = 0, f = WordY.size(); j != f; ++j) {
2942 Signedness SX = (i + 1 ==
e) ? SgnX :
Unsigned;
2944 auto [
Lo,
Hi] = createMul32(Builder, {WordX[i], SX}, {WordY[
j],
SY});
2945 Products[i +
j + 0].push_back(
Lo);
2946 Products[i +
j + 1].push_back(
Hi);
2960 for (
int i = 0, e = Products.size(); i != e; ++i) {
2961 while (Products[i].
size() > 1) {
2962 Value *Carry =
nullptr;
2963 for (
int j = i;
j !=
e; ++
j) {
2964 auto &ProdJ = Products[
j];
2965 auto [Sum, CarryOut] = createAddCarry(Builder, pop_back_or_zero(ProdJ),
2966 pop_back_or_zero(ProdJ), Carry);
2967 ProdJ.insert(ProdJ.begin(), Sum);
2974 for (
auto &
P : Products) {
2975 assert(
P.size() == 1 &&
"Should have been added together");
2982auto HvxIdioms::run() ->
bool {
2985 for (BasicBlock &
B : HVC.F) {
2986 for (
auto It =
B.rbegin(); It !=
B.rend(); ++It) {
2987 if (
auto Fxm = matchFxpMul(*It)) {
2988 Value *
New = processFxpMul(*It, *Fxm);
2994 It->replaceAllUsesWith(New);
2996 It = StartOver ?
B.rbegin()
2999 }
else if (matchGather(*It)) {
3005 It->eraseFromParent();
3009 }
else if (matchScatter(*It)) {
3015 It->eraseFromParent();
3028auto HexagonVectorCombine::run() ->
bool {
3030 dbgs() <<
"Module before HexagonVectorCombine\n" << *
F.getParent();
3033 if (HST.useHVXOps()) {
3035 Changed |= AlignVectors(*this).run();
3037 Changed |= HvxIdioms(*this).run();
3041 dbgs() <<
"Module " << (
Changed ?
"(modified)" :
"(unchanged)")
3042 <<
" after HexagonVectorCombine\n"
3048auto HexagonVectorCombine::getIntTy(
unsigned Width)
const -> IntegerType * {
3052auto HexagonVectorCombine::getByteTy(
int ElemCount)
const ->
Type * {
3054 IntegerType *ByteTy = Type::getInt8Ty(
F.getContext());
3057 return VectorType::get(ByteTy, ElemCount,
false);
3060auto HexagonVectorCombine::getBoolTy(
int ElemCount)
const ->
Type * {
3062 IntegerType *BoolTy = Type::getInt1Ty(
F.getContext());
3065 return VectorType::get(BoolTy, ElemCount,
false);
3068auto HexagonVectorCombine::getConstInt(
int Val,
unsigned Width)
const
3073auto HexagonVectorCombine::isZero(
const Value *Val)
const ->
bool {
3075 return C->isZeroValue();
3079auto HexagonVectorCombine::getIntValue(
const Value *Val)
const
3080 -> std::optional<APInt> {
3082 return CI->getValue();
3083 return std::nullopt;
3086auto HexagonVectorCombine::isUndef(
const Value *Val)
const ->
bool {
3090auto HexagonVectorCombine::isTrue(
const Value *Val)
const ->
bool {
3094auto HexagonVectorCombine::isFalse(
const Value *Val)
const ->
bool {
3098auto HexagonVectorCombine::getHvxTy(
Type *ElemTy,
bool Pair)
const
3104 "Invalid HVX element type");
3105 unsigned HwLen = HST.getVectorLength();
3107 return VectorType::get(ElemTy, Pair ? 2 * NumElems : NumElems,
3111auto HexagonVectorCombine::getSizeOf(
const Value *Val, SizeKind Kind)
const
3113 return getSizeOf(Val->
getType(), Kind);
3116auto HexagonVectorCombine::getSizeOf(
const Type *Ty, SizeKind Kind)
const
3118 auto *NcTy =
const_cast<Type *
>(Ty);
3121 return DL.getTypeStoreSize(NcTy).getFixedValue();
3123 return DL.getTypeAllocSize(NcTy).getFixedValue();
3128auto HexagonVectorCombine::getTypeAlignment(
Type *Ty)
const ->
int {
3131 if (HST.isTypeForHVX(Ty))
3132 return HST.getVectorLength();
3133 return DL.getABITypeAlign(Ty).value();
3136auto HexagonVectorCombine::length(
Value *Val)
const ->
size_t {
3137 return length(Val->
getType());
3140auto HexagonVectorCombine::length(
Type *Ty)
const ->
size_t {
3142 assert(VecTy &&
"Must be a vector type");
3143 return VecTy->getElementCount().getFixedValue();
3146auto HexagonVectorCombine::simplify(
Value *V)
const ->
Value * {
3148 SimplifyQuery Q(
DL, &TLI, &DT, &AC, In);
3155auto HexagonVectorCombine::insertb(IRBuilderBase &Builder,
Value *Dst,
3157 int Where)
const ->
Value * {
3158 assert(isByteVecTy(Dst->getType()) && isByteVecTy(Src->getType()));
3159 int SrcLen = getSizeOf(Src);
3160 int DstLen = getSizeOf(Dst);
3166 Value *P2Src = vresize(Builder, Src, P2Len,
Poison);
3167 Value *P2Dst = vresize(Builder, Dst, P2Len,
Poison);
3170 for (
int i = 0; i != P2Len; ++i) {
3174 (Where <= i && i < Where +
Length) ? P2Len + Start + (i - Where) : i;
3178 return vresize(Builder, P2Insert, DstLen,
Poison);
3181auto HexagonVectorCombine::vlalignb(IRBuilderBase &Builder,
Value *
Lo,
3183 assert(
Lo->getType() ==
Hi->getType() &&
"Argument type mismatch");
3186 int VecLen = getSizeOf(
Hi);
3187 if (
auto IntAmt = getIntValue(Amt))
3188 return getElementRange(Builder,
Lo,
Hi, VecLen - IntAmt->getSExtValue(),
3191 if (HST.isTypeForHVX(
Hi->getType())) {
3192 assert(
static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
3193 "Expecting an exact HVX type");
3194 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_vlalignb),
3195 Hi->getType(), {Hi, Lo, Amt});
3203 Builder.
CreateTrunc(Shift, Type::getInt32Ty(
F.getContext()),
"trn");
3208 return vralignb(Builder,
Lo,
Hi,
Sub);
3213auto HexagonVectorCombine::vralignb(IRBuilderBase &Builder,
Value *
Lo,
3215 assert(
Lo->getType() ==
Hi->getType() &&
"Argument type mismatch");
3218 int VecLen = getSizeOf(
Lo);
3219 if (
auto IntAmt = getIntValue(Amt))
3220 return getElementRange(Builder,
Lo,
Hi, IntAmt->getSExtValue(), VecLen);
3222 if (HST.isTypeForHVX(
Lo->getType())) {
3223 assert(
static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
3224 "Expecting an exact HVX type");
3225 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_valignb),
3226 Lo->getType(), {Hi, Lo, Amt});
3233 Builder.
CreateTrunc(Shift, Type::getInt32Ty(
F.getContext()),
"trn");
3237 Type *Int64Ty = Type::getInt64Ty(
F.getContext());
3249auto HexagonVectorCombine::concat(IRBuilderBase &Builder,
3253 std::vector<Value *> Work[2];
3254 int ThisW = 0, OtherW = 1;
3256 Work[ThisW].assign(Vecs.begin(), Vecs.end());
3257 while (Work[ThisW].
size() > 1) {
3259 SMask.
resize(length(Ty) * 2);
3260 std::iota(SMask.
begin(), SMask.
end(), 0);
3262 Work[OtherW].clear();
3263 if (Work[ThisW].
size() % 2 != 0)
3265 for (
int i = 0, e = Work[ThisW].
size(); i <
e; i += 2) {
3267 Work[ThisW][i], Work[ThisW][i + 1], SMask,
"shf");
3268 Work[OtherW].push_back(Joined);
3276 SMask.
resize(Vecs.size() * length(Vecs.front()->getType()));
3277 std::iota(SMask.
begin(), SMask.
end(), 0);
3282auto HexagonVectorCombine::vresize(IRBuilderBase &Builder,
Value *Val,
3286 assert(ValTy->getElementType() == Pad->getType());
3288 int CurSize = length(ValTy);
3289 if (CurSize == NewSize)
3292 if (CurSize > NewSize)
3293 return getElementRange(Builder, Val, Val, 0, NewSize);
3295 SmallVector<int, 128> SMask(NewSize);
3296 std::iota(SMask.
begin(), SMask.
begin() + CurSize, 0);
3297 std::fill(SMask.
begin() + CurSize, SMask.
end(), CurSize);
3302auto HexagonVectorCombine::rescale(IRBuilderBase &Builder,
Value *Mask,
3309 Type *FromSTy = FromTy->getScalarType();
3310 Type *ToSTy = ToTy->getScalarType();
3311 if (FromSTy == ToSTy)
3314 int FromSize = getSizeOf(FromSTy);
3315 int ToSize = getSizeOf(ToSTy);
3316 assert(FromSize % ToSize == 0 || ToSize % FromSize == 0);
3319 int FromCount = length(MaskTy);
3320 int ToCount = (FromCount * FromSize) / ToSize;
3321 assert((FromCount * FromSize) % ToSize == 0);
3323 auto *FromITy =
getIntTy(FromSize * 8);
3324 auto *ToITy =
getIntTy(ToSize * 8);
3329 Mask, VectorType::get(FromITy, FromCount,
false),
"sxt");
3331 Ext, VectorType::get(ToITy, ToCount,
false),
"cst");
3333 Cast, VectorType::get(getBoolTy(), ToCount,
false),
"trn");
3337auto HexagonVectorCombine::vlsb(IRBuilderBase &Builder,
Value *Val)
const
3340 if (ScalarTy == getBoolTy())
3343 Value *Bytes = vbytes(Builder, Val);
3345 return Builder.
CreateTrunc(Bytes, getBoolTy(getSizeOf(VecTy)),
"trn");
3348 return Builder.
CreateTrunc(Bytes, getBoolTy(),
"trn");
3352auto HexagonVectorCombine::vbytes(IRBuilderBase &Builder,
Value *Val)
const
3355 if (ScalarTy == getByteTy())
3358 if (ScalarTy != getBoolTy())
3359 return Builder.
CreateBitCast(Val, getByteTy(getSizeOf(Val)),
"cst");
3362 return Builder.
CreateSExt(Val, VectorType::get(getByteTy(), VecTy),
"sxt");
3363 return Builder.
CreateSExt(Val, getByteTy(),
"sxt");
3366auto HexagonVectorCombine::subvector(IRBuilderBase &Builder,
Value *Val,
3367 unsigned Start,
unsigned Length)
const
3370 return getElementRange(Builder, Val, Val, Start,
Length);
3373auto HexagonVectorCombine::sublo(IRBuilderBase &Builder,
Value *Val)
const
3375 size_t Len = length(Val);
3376 assert(Len % 2 == 0 &&
"Length should be even");
3377 return subvector(Builder, Val, 0, Len / 2);
3380auto HexagonVectorCombine::subhi(IRBuilderBase &Builder,
Value *Val)
const
3382 size_t Len = length(Val);
3383 assert(Len % 2 == 0 &&
"Length should be even");
3384 return subvector(Builder, Val, Len / 2, Len / 2);
3387auto HexagonVectorCombine::vdeal(IRBuilderBase &Builder,
Value *Val0,
3389 assert(Val0->getType() == Val1->getType());
3390 int Len = length(Val0);
3391 SmallVector<int, 128>
Mask(2 * Len);
3393 for (
int i = 0; i !=
Len; ++i) {
3400auto HexagonVectorCombine::vshuff(IRBuilderBase &Builder,
Value *Val0,
3402 assert(Val0->getType() == Val1->getType());
3403 int Len = length(Val0);
3404 SmallVector<int, 128>
Mask(2 * Len);
3406 for (
int i = 0; i !=
Len; ++i) {
3407 Mask[2 * i + 0] = i;
3413auto HexagonVectorCombine::createHvxIntrinsic(IRBuilderBase &Builder,
3419 auto getCast = [&](IRBuilderBase &Builder,
Value *Val,
3421 Type *SrcTy = Val->getType();
3422 if (SrcTy == DestTy)
3427 assert(HST.isTypeForHVX(SrcTy,
true));
3429 Type *BoolTy = Type::getInt1Ty(
F.getContext());
3434 unsigned HwLen = HST.getVectorLength();
3435 Intrinsic::ID TC = HwLen == 64 ? Intrinsic::hexagon_V6_pred_typecast
3436 : Intrinsic::hexagon_V6_pred_typecast_128B;
3446 for (
int i = 0, e =
Args.size(); i != e; ++i) {
3448 Type *
T = IntrTy->getParamType(i);
3449 if (
A->getType() !=
T) {
3455 StringRef MaybeName = !IntrTy->getReturnType()->isVoidTy() ?
"cup" :
"";
3456 CallInst *
Call = Builder.
CreateCall(IntrFn, IntrArgs, MaybeName);
3463 if (RetTy ==
nullptr || CallTy == RetTy)
3466 assert(HST.isTypeForHVX(CallTy,
true));
3467 return getCast(Builder,
Call, RetTy);
3470auto HexagonVectorCombine::splitVectorElements(IRBuilderBase &Builder,
3472 unsigned ToWidth)
const
3487 assert(VecTy->getElementType()->isIntegerTy());
3488 unsigned FromWidth = VecTy->getScalarSizeInBits();
3490 assert(ToWidth <= FromWidth &&
"Breaking up into wider elements?");
3491 unsigned NumResults = FromWidth / ToWidth;
3495 unsigned Length = length(VecTy);
3499 auto splitInHalf = [&](
unsigned Begin,
unsigned End,
auto splitFunc) ->
void {
3503 if (Begin + 1 == End)
3509 auto *VTy = VectorType::get(
getIntTy(Width / 2), 2 *
Length,
false);
3512 Value *Res =
vdeal(Builder, sublo(Builder, VVal), subhi(Builder, VVal));
3514 unsigned Half = (Begin + End) / 2;
3515 Results[Begin] = sublo(Builder, Res);
3516 Results[Half] = subhi(Builder, Res);
3518 splitFunc(Begin, Half, splitFunc);
3519 splitFunc(Half, End, splitFunc);
3522 splitInHalf(0, NumResults, splitInHalf);
3526auto HexagonVectorCombine::joinVectorElements(IRBuilderBase &Builder,
3528 VectorType *ToType)
const
3530 assert(ToType->getElementType()->isIntegerTy());
3541 unsigned ToWidth = ToType->getScalarSizeInBits();
3542 unsigned Width = Inputs.front()->getType()->getScalarSizeInBits();
3543 assert(Width <= ToWidth);
3545 unsigned Length = length(Inputs.front()->getType());
3547 unsigned NeedInputs = ToWidth / Width;
3548 if (Inputs.size() != NeedInputs) {
3553 Last, ConstantInt::get(
Last->getType(), Width - 1),
"asr");
3554 Inputs.resize(NeedInputs, Sign);
3557 while (Inputs.size() > 1) {
3560 for (
int i = 0, e = Inputs.size(); i < e; i += 2) {
3561 Value *Res =
vshuff(Builder, Inputs[i], Inputs[i + 1]);
3564 Inputs.resize(Inputs.size() / 2);
3567 assert(Inputs.front()->getType() == ToType);
3568 return Inputs.front();
3571auto HexagonVectorCombine::calculatePointerDifference(
Value *Ptr0,
3573 -> std::optional<int> {
3575 const SCEV *Scev0 = SE.getSCEV(Ptr0);
3576 const SCEV *Scev1 = SE.getSCEV(Ptr1);
3577 const SCEV *ScevDiff = SE.getMinusSCEV(Scev0, Scev1);
3579 APInt
V =
Const->getAPInt();
3580 if (
V.isSignedIntN(8 *
sizeof(
int)))
3581 return static_cast<int>(
V.getSExtValue());
3588 I->eraseFromParent();
3590 SmallVector<Instruction *, 8> ToErase;
3593#define CallBuilder(B, F) \
3596 if (auto *I = dyn_cast<Instruction>(V)) \
3597 B_.ToErase.push_back(I); \
3601 auto Simplify = [
this](
Value *
V) {
3607 auto StripBitCast = [](
Value *
V) {
3609 V =
C->getOperand(0);
3613 Ptr0 = StripBitCast(Ptr0);
3614 Ptr1 = StripBitCast(Ptr1);
3616 return std::nullopt;
3620 if (Gep0->getPointerOperand() != Gep1->getPointerOperand())
3621 return std::nullopt;
3622 if (Gep0->getSourceElementType() != Gep1->getSourceElementType())
3623 return std::nullopt;
3625 Builder
B(Gep0->getParent());
3626 int Scale = getSizeOf(Gep0->getSourceElementType(),
Alloc);
3629 if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2)
3630 return std::nullopt;
3632 Value *Idx0 = Gep0->getOperand(1);
3633 Value *Idx1 = Gep1->getOperand(1);
3638 return Diff->getSExtValue() * Scale;
3640 KnownBits Known0 = getKnownBits(Idx0, Gep0);
3641 KnownBits Known1 = getKnownBits(Idx1, Gep1);
3644 return std::nullopt;
3652 Diff0 =
C->getSExtValue();
3654 return std::nullopt;
3663 Diff1 =
C->getSExtValue();
3665 return std::nullopt;
3668 return (Diff0 + Diff1) * Scale;
3673auto HexagonVectorCombine::getNumSignificantBits(
const Value *V,
3674 const Instruction *CtxI)
const
3679auto HexagonVectorCombine::getKnownBits(
const Value *V,
3680 const Instruction *CtxI)
const
3685auto HexagonVectorCombine::isSafeToClone(
const Instruction &In)
const ->
bool {
3686 if (
In.mayHaveSideEffects() ||
In.isAtomic() ||
In.isVolatile() ||
3687 In.isFenceLike() ||
In.mayReadOrWriteMemory()) {
3695template <
typename T>
3696auto HexagonVectorCombine::isSafeToMoveBeforeInBB(
const Instruction &In,
3698 const T &IgnoreInsts)
const
3701 [
this](
const Instruction &
I) -> std::optional<MemoryLocation> {
3703 switch (
II->getIntrinsicID()) {
3704 case Intrinsic::masked_load:
3706 case Intrinsic::masked_store:
3722 bool MayWrite =
In.mayWriteToMemory();
3723 auto MaybeLoc = getLocOrNone(In);
3725 auto From =
In.getIterator();
3728 bool MoveUp = (To !=
Block.end() && To->comesBefore(&In));
3730 MoveUp ? std::make_pair(To, From) : std::make_pair(std::next(From), To);
3731 for (
auto It =
Range.first; It !=
Range.second; ++It) {
3732 const Instruction &I = *It;
3733 if (llvm::is_contained(IgnoreInsts, &I))
3736 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
3737 if (II->getIntrinsicID() == Intrinsic::assume)
3744 if (!CB->hasFnAttr(Attribute::WillReturn))
3746 if (!CB->hasFnAttr(Attribute::NoSync))
3749 if (
I.mayReadOrWriteMemory()) {
3750 auto MaybeLocI = getLocOrNone(I);
3751 if (MayWrite || I.mayWriteToMemory()) {
3752 if (!MaybeLoc || !MaybeLocI)
3754 if (!AA.isNoAlias(*MaybeLoc, *MaybeLocI))
3762auto HexagonVectorCombine::isByteVecTy(
Type *Ty)
const ->
bool {
3764 return VecTy->getElementType() == getByteTy();
3768auto HexagonVectorCombine::getElementRange(IRBuilderBase &Builder,
Value *
Lo,
3772 SmallVector<int, 128> SMask(
Length);
3773 std::iota(SMask.
begin(), SMask.
end(), Start);
3780class HexagonVectorCombineLegacy :
public FunctionPass {
3784 HexagonVectorCombineLegacy() : FunctionPass(
ID) {}
3786 StringRef getPassName()
const override {
return "Hexagon Vector Combine"; }
3788 void getAnalysisUsage(AnalysisUsage &AU)
const override {
3796 FunctionPass::getAnalysisUsage(AU);
3800 if (skipFunction(
F))
3802 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
3803 AssumptionCache &AC =
3804 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
3805 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3806 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
3807 TargetLibraryInfo &TLI =
3808 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
3809 auto &TM = getAnalysis<TargetPassConfig>().getTM<HexagonTargetMachine>();
3810 HexagonVectorCombine HVC(
F, AA, AC, DT, SE, TLI, TM);
3816char HexagonVectorCombineLegacy::ID = 0;
3819 "Hexagon Vector Combine",
false,
false)
3830 return new HexagonVectorCombineLegacy();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Prepare AGPR Alloc
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
static IntegerType * getIntTy(IRBuilderBase &B, const TargetLibraryInfo *TLI)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
static Value * locateIndexesFromIntrinsic(Instruction *In)
Instruction * locateDestination(Instruction *In, HvxIdioms::DstQualifier &Qual)
Value * getReinterpretiveCast_i8_to_i32(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, Value *I)
static Value * locateIndexesFromGEP(Value *In)
#define CallBuilder(B, F)
Value * getPointer(Value *Ptr)
#define DEFAULT_HVX_VTCM_PAGE_SIZE
static Value * locateAddressFromIntrinsic(Instruction *In)
static Instruction * selectDestination(Instruction *In, HvxIdioms::DstQualifier &Qual)
Value * get_i32_Mask(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, unsigned int pattern)
bool isArithmetic(unsigned Opc)
static Type * getIndexType(Value *In)
GetElementPtrInst * locateGepFromIntrinsic(Instruction *In)
Value * getReinterpretiveCast_i16_to_i32(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, Value *I)
iv Induction Variable Users
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static bool isCandidate(const MachineInstr *MI, Register &DefedReg, Register FrameReg)
static bool isUndef(const MachineInstr &MI)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Remove Loads Into Fake Uses
static ConstantInt * getConstInt(MDNode *MD, unsigned NumOp)
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Target-Independent Code Generator Pass Configuration Options pass.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
InstListType::const_iterator const_iterator
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
AttributeList getAttributes() const
Return the attributes for this call.
@ ICMP_ULT
unsigned less than
static LLVM_ABI Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=true)
Return a ConstantInt with the specified value for the specified type.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
iterator_range< iterator > children()
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
const BasicBlock & back() const
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * getPointerOperand()
bool isHVXVectorType(EVT VecTy, bool IncludeBool=false) const
bool useHVXV62Ops() const
bool useHVXV69Ops() const
unsigned getVectorLength() const
bool useHVXV66Ops() const
bool isTypeForHVX(Type *VecTy, bool IncludeBool=false) const
Intrinsic::ID getIntrinsicId(unsigned Opc) const
Common base class shared among various IRBuilders.
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
const char * getOpcodeName() const
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
bool doesNotAccessMemory() const
Whether this function accesses no memory.
bool onlyAccessesInaccessibleMem() const
Whether this function only (at most) accesses inaccessible memory.
static LLVM_ABI std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
static LLVM_ABI MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
The main scalar evolution driver.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Rounding
Possible values of current rounding mode, which is specified in bits 23:22 of FPCR.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI Instruction * getTerminator() const
LLVM_ABI Instruction & front() const
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createHexagonVectorCombineLegacyPass()
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
DomTreeNodeBase< BasicBlock > DomTreeNode
auto reverse(ContainerTy &&C)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
LLVM_ABI bool mayHaveNonDefUseDependency(const Instruction &I)
Returns true if the result or effects of the given instructions I depend values not reachable through...
MaskT vshuff(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
MaskT vdeal(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.