44#define DEBUG_TYPE "legalizer"
57static std::pair<int, int>
63 unsigned NumParts =
Size / NarrowSize;
64 unsigned LeftoverSize =
Size - NumParts * NarrowSize;
67 if (LeftoverSize == 0)
72 if (LeftoverSize % EltSize != 0)
81 return std::make_pair(NumParts, NumLeftover);
89 switch (Ty.getSizeInBits()) {
130 auto Step = LI.getAction(
MI, MRI);
131 switch (Step.Action) {
146 return bitcast(
MI, Step.TypeIdx, Step.NewType);
149 return lower(
MI, Step.TypeIdx, Step.NewType);
158 return LI.legalizeCustom(*
this,
MI, LocObserver) ?
Legalized
166void LegalizerHelper::insertParts(
Register DstReg,
188 assert(LeftoverRegs.
size() == 1 &&
"Expected one leftover register");
190 AllRegs.append(LeftoverRegs.
begin(), LeftoverRegs.
end());
191 return mergeMixedSubvectors(DstReg, AllRegs);
197 extractGCDType(GCDRegs, GCDTy, PartReg);
198 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
199 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
204 LLT Ty = MRI.getType(
Reg);
212void LegalizerHelper::mergeMixedSubvectors(
Register DstReg,
215 for (
unsigned i = 0; i < PartRegs.
size() - 1; ++i)
216 appendVectorElts(AllElts, PartRegs[i]);
219 if (!MRI.getType(Leftover).isVector())
222 appendVectorElts(AllElts, Leftover);
224 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
230 assert(
MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
232 const int StartIdx = Regs.
size();
233 const int NumResults =
MI.getNumOperands() - 1;
235 for (
int I = 0;
I != NumResults; ++
I)
236 Regs[StartIdx +
I] =
MI.getOperand(
I).getReg();
241 LLT SrcTy = MRI.getType(SrcReg);
242 if (SrcTy == GCDTy) {
248 auto Unmerge =
MIRBuilder.buildUnmerge(GCDTy, SrcReg);
255 LLT SrcTy = MRI.getType(SrcReg);
257 extractGCDType(Parts, GCDTy, SrcReg);
261LLT LegalizerHelper::buildLCMMergePieces(
LLT DstTy,
LLT NarrowTy,
LLT GCDTy,
263 unsigned PadStrategy) {
268 int NumOrigSrc = VRegs.
size();
274 if (NumOrigSrc < NumParts * NumSubParts) {
275 if (PadStrategy == TargetOpcode::G_ZEXT)
276 PadReg =
MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
277 else if (PadStrategy == TargetOpcode::G_ANYEXT)
278 PadReg =
MIRBuilder.buildUndef(GCDTy).getReg(0);
280 assert(PadStrategy == TargetOpcode::G_SEXT);
285 PadReg =
MIRBuilder.buildAShr(GCDTy, VRegs.
back(), ShiftAmt).getReg(0);
301 for (
int I = 0;
I != NumParts; ++
I) {
302 bool AllMergePartsArePadding =
true;
305 for (
int J = 0; J != NumSubParts; ++J) {
306 int Idx =
I * NumSubParts + J;
307 if (Idx >= NumOrigSrc) {
308 SubMerge[J] = PadReg;
312 SubMerge[J] = VRegs[Idx];
315 AllMergePartsArePadding =
false;
321 if (AllMergePartsArePadding && !AllPadReg) {
322 if (PadStrategy == TargetOpcode::G_ANYEXT)
323 AllPadReg =
MIRBuilder.buildUndef(NarrowTy).getReg(0);
324 else if (PadStrategy == TargetOpcode::G_ZEXT)
325 AllPadReg =
MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
334 Remerge[
I] = AllPadReg;
338 if (NumSubParts == 1)
339 Remerge[
I] = SubMerge[0];
341 Remerge[
I] =
MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
344 if (AllMergePartsArePadding && !AllPadReg)
345 AllPadReg = Remerge[
I];
348 VRegs = std::move(Remerge);
352void LegalizerHelper::buildWidenedRemergeToDst(
Register DstReg,
LLT LCMTy,
354 LLT DstTy = MRI.getType(DstReg);
359 if (DstTy == LCMTy) {
360 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
364 auto Remerge =
MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
373 UnmergeDefs[0] = DstReg;
374 for (
unsigned I = 1;
I != NumDefs; ++
I)
375 UnmergeDefs[
I] = MRI.createGenericVirtualRegister(DstTy);
378 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
386#define RTLIBCASE_INT(LibcallPrefix) \
390 return RTLIB::LibcallPrefix##32; \
392 return RTLIB::LibcallPrefix##64; \
394 return RTLIB::LibcallPrefix##128; \
396 llvm_unreachable("unexpected size"); \
400#define RTLIBCASE(LibcallPrefix) \
404 return RTLIB::LibcallPrefix##32; \
406 return RTLIB::LibcallPrefix##64; \
408 return RTLIB::LibcallPrefix##80; \
410 return RTLIB::LibcallPrefix##128; \
412 llvm_unreachable("unexpected size"); \
417 case TargetOpcode::G_LROUND:
419 case TargetOpcode::G_LLROUND:
421 case TargetOpcode::G_MUL:
423 case TargetOpcode::G_SDIV:
425 case TargetOpcode::G_UDIV:
427 case TargetOpcode::G_SREM:
429 case TargetOpcode::G_UREM:
431 case TargetOpcode::G_CTLZ_ZERO_POISON:
433 case TargetOpcode::G_FADD:
435 case TargetOpcode::G_FSUB:
437 case TargetOpcode::G_FMUL:
439 case TargetOpcode::G_FDIV:
441 case TargetOpcode::G_FEXP:
443 case TargetOpcode::G_FEXP2:
445 case TargetOpcode::G_FEXP10:
447 case TargetOpcode::G_FREM:
449 case TargetOpcode::G_FPOW:
451 case TargetOpcode::G_FPOWI:
453 case TargetOpcode::G_FMA:
455 case TargetOpcode::G_FSIN:
457 case TargetOpcode::G_FCOS:
459 case TargetOpcode::G_FTAN:
461 case TargetOpcode::G_FASIN:
463 case TargetOpcode::G_FACOS:
465 case TargetOpcode::G_FATAN:
467 case TargetOpcode::G_FATAN2:
469 case TargetOpcode::G_FSINH:
471 case TargetOpcode::G_FCOSH:
473 case TargetOpcode::G_FTANH:
475 case TargetOpcode::G_FSINCOS:
477 case TargetOpcode::G_FMODF:
479 case TargetOpcode::G_FLOG10:
481 case TargetOpcode::G_FLOG:
483 case TargetOpcode::G_FLOG2:
485 case TargetOpcode::G_FLDEXP:
487 case TargetOpcode::G_FCEIL:
489 case TargetOpcode::G_FFLOOR:
491 case TargetOpcode::G_FMINNUM:
493 case TargetOpcode::G_FMAXNUM:
495 case TargetOpcode::G_FMINIMUMNUM:
497 case TargetOpcode::G_FMAXIMUMNUM:
499 case TargetOpcode::G_FSQRT:
501 case TargetOpcode::G_FRINT:
503 case TargetOpcode::G_FNEARBYINT:
505 case TargetOpcode::G_INTRINSIC_TRUNC:
507 case TargetOpcode::G_INTRINSIC_ROUND:
509 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
511 case TargetOpcode::G_INTRINSIC_LRINT:
513 case TargetOpcode::G_INTRINSIC_LLRINT:
533 AttributeList CallerAttrs =
F.getAttributes();
534 if (AttrBuilder(
F.getContext(), CallerAttrs.getRetAttrs())
535 .removeAttribute(Attribute::NoAlias)
536 .removeAttribute(Attribute::NonNull)
541 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
542 CallerAttrs.hasRetAttr(Attribute::SExt))
553 if (
MI.getOpcode() == TargetOpcode::G_BZERO)
560 if (!VReg.
isVirtual() || VReg !=
Next->getOperand(1).getReg())
568 if (Ret ==
MBB.instr_end() || !Ret->isReturn())
571 if (Ret->getNumImplicitOperands() != 1)
574 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
591 auto &CLI = *
MIRBuilder.getMF().getSubtarget().getCallLowering();
596 Info.OrigRet = Result;
599 (Result.Ty->isVoidTy() ||
600 Result.Ty ==
MIRBuilder.getMF().getFunction().getReturnType()) &&
608 if (
MI && Info.LoweredTailCall) {
609 assert(Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
619 (
Next->isCopy() ||
Next->isReturn() ||
Next->isDebugInstr()) &&
620 "Expected instr following MI to be return or debug inst?");
623 Next->eraseFromParent();
624 }
while (
MI->getNextNode());
639 RTLIB::LibcallImpl LibcallImpl = Libcalls->getLibcallImpl(
Libcall);
640 if (LibcallImpl == RTLIB::Unsupported)
644 const CallingConv::ID CC = Libcalls->getLibcallImplCallingConv(LibcallImpl);
658 Args.push_back({MO.getReg(), OpType, 0});
677 unsigned AddrSpace =
DL.getAllocaAddrSpace();
695 if (LibcallResult != LegalizeResult::Legalized)
703 MIRBuilder.
buildLoad(DstSin, StackPtrSin, *LoadMMOSin);
704 MIRBuilder.
buildLoad(DstCos, StackPtrCos, *LoadMMOCos);
705 MI.eraseFromParent();
720 LLT DstTy = MRI.getType(DstFrac);
725 unsigned AddrSpace =
DL.getAllocaAddrSpace();
726 MachinePointerInfo PtrInfo;
735 {{Src, OpType, 0}, {StackPtrInt, PointerType::get(Ctx, AddrSpace), 1}},
738 if (LibcallResult != LegalizeResult::Legalized)
744 MIRBuilder.
buildLoad(DstInt, StackPtrInt, *LoadMMOInt);
745 MI.eraseFromParent();
756 case TargetOpcode::G_FPEXT:
758 case TargetOpcode::G_FPTRUNC:
760 case TargetOpcode::G_FPTOSI:
762 case TargetOpcode::G_FPTOUI:
764 case TargetOpcode::G_SITOFP:
766 case TargetOpcode::G_UITOFP:
776 if (FromType->isIntegerTy()) {
777 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
778 Arg.
Flags[0].setSExt();
780 Arg.
Flags[0].setZExt();
791 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
795 for (
unsigned i = 0; i <
MI.getNumOperands() - 1; ++i) {
799 LLT OpLLT = MRI.getType(Reg);
800 Type *OpTy =
nullptr;
805 Args.push_back({Reg, OpTy, 0});
808 auto &CLI = *
MIRBuilder.getMF().getSubtarget().getCallLowering();
809 RTLIB::Libcall RTLibcall;
810 unsigned Opc =
MI.getOpcode();
812 case TargetOpcode::G_BZERO:
813 RTLibcall = RTLIB::BZERO;
815 case TargetOpcode::G_MEMCPY:
816 RTLibcall = RTLIB::MEMCPY;
817 Args[0].Flags[0].setReturned();
819 case TargetOpcode::G_MEMMOVE:
820 RTLibcall = RTLIB::MEMMOVE;
821 Args[0].Flags[0].setReturned();
823 case TargetOpcode::G_MEMSET:
824 RTLibcall = RTLIB::MEMSET;
825 Args[0].Flags[0].setReturned();
834 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
837 if (RTLibcallImpl == RTLIB::Unsupported) {
844 Info.
CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
851 MI.getOperand(
MI.getNumOperands() - 1).getImm() &&
858 if (Info.LoweredTailCall) {
859 assert(Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
869 (
Next->isCopy() ||
Next->isReturn() ||
Next->isDebugInstr()) &&
870 "Expected instr following MI to be return or debug inst?");
873 Next->eraseFromParent();
874 }
while (
MI.getNextNode());
884 unsigned Opc =
MI.getOpcode();
886 auto &MMO = AtomicMI.getMMO();
887 auto Ordering = MMO.getMergedOrdering();
888 LLT MemType = MMO.getMemoryType();
891 return RTLIB::UNKNOWN_LIBCALL;
893#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
895 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
897 case TargetOpcode::G_ATOMIC_CMPXCHG:
898 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
899 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
900 return getOutlineAtomicHelper(LC, Ordering, MemSize);
902 case TargetOpcode::G_ATOMICRMW_XCHG: {
903 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
904 return getOutlineAtomicHelper(LC, Ordering, MemSize);
906 case TargetOpcode::G_ATOMICRMW_ADD:
907 case TargetOpcode::G_ATOMICRMW_SUB: {
908 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
909 return getOutlineAtomicHelper(LC, Ordering, MemSize);
911 case TargetOpcode::G_ATOMICRMW_AND: {
912 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
913 return getOutlineAtomicHelper(LC, Ordering, MemSize);
915 case TargetOpcode::G_ATOMICRMW_OR: {
916 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
917 return getOutlineAtomicHelper(LC, Ordering, MemSize);
919 case TargetOpcode::G_ATOMICRMW_XOR: {
920 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
921 return getOutlineAtomicHelper(LC, Ordering, MemSize);
924 return RTLIB::UNKNOWN_LIBCALL;
937 unsigned Opc =
MI.getOpcode();
939 case TargetOpcode::G_ATOMIC_CMPXCHG:
940 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
943 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
944 MI.getFirst4RegLLTs();
947 if (
Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
948 std::tie(Ret, RetLLT,
Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
949 NewLLT) =
MI.getFirst5RegLLTs();
959 case TargetOpcode::G_ATOMICRMW_XCHG:
960 case TargetOpcode::G_ATOMICRMW_ADD:
961 case TargetOpcode::G_ATOMICRMW_SUB:
962 case TargetOpcode::G_ATOMICRMW_AND:
963 case TargetOpcode::G_ATOMICRMW_OR:
964 case TargetOpcode::G_ATOMICRMW_XOR: {
965 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] =
MI.getFirst3RegLLTs();
968 if (
Opc == TargetOpcode::G_ATOMICRMW_AND)
972 else if (
Opc == TargetOpcode::G_ATOMICRMW_SUB)
987 auto &CLI = *
MIRBuilder.getMF().getSubtarget().getCallLowering();
989 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
992 if (RTLibcallImpl == RTLIB::Unsupported) {
999 Info.
CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
1013static RTLIB::Libcall
1015 RTLIB::Libcall RTLibcall;
1016 switch (
MI.getOpcode()) {
1017 case TargetOpcode::G_GET_FPENV:
1018 RTLibcall = RTLIB::FEGETENV;
1020 case TargetOpcode::G_SET_FPENV:
1021 case TargetOpcode::G_RESET_FPENV:
1022 RTLibcall = RTLIB::FESETENV;
1024 case TargetOpcode::G_GET_FPMODE:
1025 RTLibcall = RTLIB::FEGETMODE;
1027 case TargetOpcode::G_SET_FPMODE:
1028 case TargetOpcode::G_RESET_FPMODE:
1029 RTLibcall = RTLIB::FESETMODE;
1061 LLT StateTy = MRI.getType(Dst);
1064 MachinePointerInfo TempPtrInfo;
1068 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
1073 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}), LocObserver,
1081 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
1099 LLT StateTy = MRI.getType(Src);
1102 MachinePointerInfo TempPtrInfo;
1111 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
1116 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1117 LocObserver,
nullptr);
1123static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1125#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1129 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1131 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1133 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1135 llvm_unreachable("unexpected size"); \
1166 LLT OpLLT = MRI.getType(
Cmp->getLHSReg());
1169 OpLLT != MRI.getType(
Cmp->getRHSReg()))
1176 LLT DstTy = MRI.getType(DstReg);
1177 const auto Cond =
Cmp->getCond();
1182 const auto BuildLibcall = [&](
const RTLIB::Libcall
Libcall,
1187 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1191 {{
Cmp->getLHSReg(), OpType, 0}, {
Cmp->getRHSReg(), OpType, 1}},
1198 .buildICmp(ICmpPred, Res, Temp,
MIRBuilder.buildConstant(TempLLT, 0))
1204 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1206 if (BuildLibcall(
Libcall, ICmpPred, DstReg)) {
1219 const auto [OeqLibcall, OeqPred] =
1221 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1223 const auto [UnoLibcall, UnoPred] =
1225 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1240 const auto [OeqLibcall, OeqPred] =
1245 const auto [UnoLibcall, UnoPred] =
1250 if (NotOeq && NotUno)
1269 const auto [InversedLibcall, InversedPred] =
1271 if (!BuildLibcall(InversedLibcall,
1296 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
1298 unsigned PtrSize =
DL.getPointerSizeInBits(AddrSpace);
1301 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1307 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}), LocObserver, &
MI);
1312 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
1314 switch (
MI.getOpcode()) {
1317 case TargetOpcode::G_MUL:
1318 case TargetOpcode::G_SDIV:
1319 case TargetOpcode::G_UDIV:
1320 case TargetOpcode::G_SREM:
1321 case TargetOpcode::G_UREM:
1322 case TargetOpcode::G_CTLZ_ZERO_POISON: {
1323 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1331 case TargetOpcode::G_FADD:
1332 case TargetOpcode::G_FSUB:
1333 case TargetOpcode::G_FMUL:
1334 case TargetOpcode::G_FDIV:
1335 case TargetOpcode::G_FMA:
1336 case TargetOpcode::G_FPOW:
1337 case TargetOpcode::G_FREM:
1338 case TargetOpcode::G_FCOS:
1339 case TargetOpcode::G_FSIN:
1340 case TargetOpcode::G_FTAN:
1341 case TargetOpcode::G_FACOS:
1342 case TargetOpcode::G_FASIN:
1343 case TargetOpcode::G_FATAN:
1344 case TargetOpcode::G_FATAN2:
1345 case TargetOpcode::G_FCOSH:
1346 case TargetOpcode::G_FSINH:
1347 case TargetOpcode::G_FTANH:
1348 case TargetOpcode::G_FLOG10:
1349 case TargetOpcode::G_FLOG:
1350 case TargetOpcode::G_FLOG2:
1351 case TargetOpcode::G_FEXP:
1352 case TargetOpcode::G_FEXP2:
1353 case TargetOpcode::G_FEXP10:
1354 case TargetOpcode::G_FCEIL:
1355 case TargetOpcode::G_FFLOOR:
1356 case TargetOpcode::G_FMINNUM:
1357 case TargetOpcode::G_FMAXNUM:
1358 case TargetOpcode::G_FMINIMUMNUM:
1359 case TargetOpcode::G_FMAXIMUMNUM:
1360 case TargetOpcode::G_FSQRT:
1361 case TargetOpcode::G_FRINT:
1362 case TargetOpcode::G_FNEARBYINT:
1363 case TargetOpcode::G_INTRINSIC_TRUNC:
1364 case TargetOpcode::G_INTRINSIC_ROUND:
1365 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1366 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1370 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1378 case TargetOpcode::G_FSINCOS: {
1379 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1383 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1388 case TargetOpcode::G_FMODF: {
1389 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1393 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1398 case TargetOpcode::G_LROUND:
1399 case TargetOpcode::G_LLROUND:
1400 case TargetOpcode::G_INTRINSIC_LRINT:
1401 case TargetOpcode::G_INTRINSIC_LLRINT: {
1402 LLT LLTy = MRI.getType(
MI.getOperand(1).getReg());
1406 Ctx, MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits());
1408 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1414 {{
MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &
MI);
1417 MI.eraseFromParent();
1420 case TargetOpcode::G_FPOWI:
1421 case TargetOpcode::G_FLDEXP: {
1422 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1426 Ctx, MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits());
1428 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1433 {
MI.getOperand(1).getReg(), HLTy, 0},
1434 {
MI.getOperand(2).getReg(), ITy, 1}};
1435 Args[1].Flags[0].setSExt();
1437 Libcall, {
MI.getOperand(0).getReg(), HLTy, 0}, Args, LocObserver, &
MI);
1442 case TargetOpcode::G_FPEXT:
1443 case TargetOpcode::G_FPTRUNC: {
1446 if (!FromTy || !ToTy)
1453 case TargetOpcode::G_FCMP: {
1457 MI.eraseFromParent();
1460 case TargetOpcode::G_FPTOSI:
1461 case TargetOpcode::G_FPTOUI: {
1465 unsigned ToSize = MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
1466 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1469 FromTy, LocObserver);
1474 case TargetOpcode::G_SITOFP:
1475 case TargetOpcode::G_UITOFP: {
1476 unsigned FromSize = MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
1479 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1481 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SITOFP;
1488 case TargetOpcode::G_ATOMICRMW_XCHG:
1489 case TargetOpcode::G_ATOMICRMW_ADD:
1490 case TargetOpcode::G_ATOMICRMW_SUB:
1491 case TargetOpcode::G_ATOMICRMW_AND:
1492 case TargetOpcode::G_ATOMICRMW_OR:
1493 case TargetOpcode::G_ATOMICRMW_XOR:
1494 case TargetOpcode::G_ATOMIC_CMPXCHG:
1495 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1501 case TargetOpcode::G_BZERO:
1502 case TargetOpcode::G_MEMCPY:
1503 case TargetOpcode::G_MEMMOVE:
1504 case TargetOpcode::G_MEMSET: {
1509 MI.eraseFromParent();
1512 case TargetOpcode::G_GET_FPENV:
1513 case TargetOpcode::G_GET_FPMODE: {
1519 case TargetOpcode::G_SET_FPENV:
1520 case TargetOpcode::G_SET_FPMODE: {
1526 case TargetOpcode::G_RESET_FPENV:
1527 case TargetOpcode::G_RESET_FPMODE: {
1535 MI.eraseFromParent();
1542 uint64_t SizeOp0 = MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
1545 switch (
MI.getOpcode()) {
1548 case TargetOpcode::G_IMPLICIT_DEF: {
1550 LLT DstTy = MRI.getType(DstReg);
1558 if (SizeOp0 % NarrowSize != 0) {
1563 MI.eraseFromParent();
1567 int NumParts = SizeOp0 / NarrowSize;
1570 for (
int i = 0; i < NumParts; ++i)
1574 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1576 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1577 MI.eraseFromParent();
1580 case TargetOpcode::G_CONSTANT: {
1581 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
1582 const APInt &Val =
MI.getOperand(1).getCImm()->getValue();
1583 unsigned TotalSize = Ty.getSizeInBits();
1585 int NumParts = TotalSize / NarrowSize;
1588 for (
int I = 0;
I != NumParts; ++
I) {
1589 unsigned Offset =
I * NarrowSize;
1596 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1598 if (LeftoverBits != 0) {
1602 Val.
lshr(NumParts * NarrowSize).
trunc(LeftoverBits));
1606 insertParts(
MI.getOperand(0).getReg(),
1607 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1609 MI.eraseFromParent();
1612 case TargetOpcode::G_SEXT:
1613 case TargetOpcode::G_ZEXT:
1614 case TargetOpcode::G_ANYEXT:
1616 case TargetOpcode::G_TRUNC: {
1620 uint64_t SizeOp1 = MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
1622 LLVM_DEBUG(
dbgs() <<
"Can't narrow trunc to type " << NarrowTy <<
"\n");
1626 auto Unmerge =
MIRBuilder.buildUnmerge(NarrowTy,
MI.getOperand(1));
1627 MIRBuilder.buildCopy(
MI.getOperand(0), Unmerge.getReg(0));
1628 MI.eraseFromParent();
1631 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1632 case TargetOpcode::G_FREEZE: {
1636 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
1641 auto Unmerge =
MIRBuilder.buildUnmerge(NarrowTy,
MI.getOperand(1).getReg());
1643 for (
unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1645 MIRBuilder.buildInstr(
MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1649 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0).getReg(), Parts);
1650 MI.eraseFromParent();
1653 case TargetOpcode::G_ADD:
1654 case TargetOpcode::G_SUB:
1655 case TargetOpcode::G_SADDO:
1656 case TargetOpcode::G_SSUBO:
1657 case TargetOpcode::G_SADDE:
1658 case TargetOpcode::G_SSUBE:
1659 case TargetOpcode::G_UADDO:
1660 case TargetOpcode::G_USUBO:
1661 case TargetOpcode::G_UADDE:
1662 case TargetOpcode::G_USUBE:
1664 case TargetOpcode::G_MUL:
1665 case TargetOpcode::G_UMULH:
1667 case TargetOpcode::G_EXTRACT:
1669 case TargetOpcode::G_INSERT:
1671 case TargetOpcode::G_LOAD: {
1673 Register DstReg = LoadMI.getDstReg();
1674 LLT DstTy = MRI.getType(DstReg);
1678 if (8 * LoadMI.getMemSize().getValue() != DstTy.
getSizeInBits()) {
1679 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1680 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1682 LoadMI.eraseFromParent();
1688 case TargetOpcode::G_ZEXTLOAD:
1689 case TargetOpcode::G_SEXTLOAD:
1690 case TargetOpcode::G_FPEXTLOAD: {
1692 Register DstReg = LoadMI.getDstReg();
1693 Register PtrReg = LoadMI.getPointerReg();
1695 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1696 auto &MMO = LoadMI.getMMO();
1699 if (MemSize == NarrowSize) {
1701 }
else if (MemSize < NarrowSize) {
1702 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1703 }
else if (MemSize > NarrowSize) {
1715 LoadMI.eraseFromParent();
1718 case TargetOpcode::G_STORE: {
1721 Register SrcReg = StoreMI.getValueReg();
1722 LLT SrcTy = MRI.getType(SrcReg);
1723 if (SrcTy.isVector())
1726 int NumParts = SizeOp0 / NarrowSize;
1728 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1729 if (SrcTy.isVector() && LeftoverBits != 0)
1732 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1733 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1735 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1736 StoreMI.eraseFromParent();
1742 case TargetOpcode::G_FPTRUNCSTORE: {
1744 Register SrcReg = StoreMI.getValueReg();
1745 Register PtrReg = StoreMI.getPointerReg();
1747 auto &MMO = StoreMI.getMMO();
1749 if (MemSize > NarrowSize) {
1753 auto TmpReg =
MIRBuilder.buildFPTrunc(NarrowTy, SrcReg);
1754 if (MemSize == NarrowSize) {
1756 }
else if (MemSize < NarrowSize) {
1757 MIRBuilder.buildStoreInstr(TargetOpcode::G_FPTRUNCSTORE, TmpReg, PtrReg,
1761 StoreMI.eraseFromParent();
1764 case TargetOpcode::G_SELECT:
1766 case TargetOpcode::G_AND:
1767 case TargetOpcode::G_OR:
1768 case TargetOpcode::G_XOR: {
1780 case TargetOpcode::G_SHL:
1781 case TargetOpcode::G_LSHR:
1782 case TargetOpcode::G_ASHR:
1784 case TargetOpcode::G_CTLZ:
1785 case TargetOpcode::G_CTLZ_ZERO_POISON:
1786 case TargetOpcode::G_CTTZ:
1787 case TargetOpcode::G_CTTZ_ZERO_POISON:
1788 case TargetOpcode::G_CTLS:
1789 case TargetOpcode::G_CTPOP:
1791 switch (
MI.getOpcode()) {
1792 case TargetOpcode::G_CTLZ:
1793 case TargetOpcode::G_CTLZ_ZERO_POISON:
1795 case TargetOpcode::G_CTTZ:
1796 case TargetOpcode::G_CTTZ_ZERO_POISON:
1798 case TargetOpcode::G_CTPOP:
1800 case TargetOpcode::G_CTLS:
1810 case TargetOpcode::G_INTTOPTR:
1818 case TargetOpcode::G_PTRTOINT:
1826 case TargetOpcode::G_PHI: {
1829 if (SizeOp0 % NarrowSize != 0)
1832 unsigned NumParts = SizeOp0 / NarrowSize;
1836 for (
unsigned i = 1; i <
MI.getNumOperands(); i += 2) {
1844 for (
unsigned i = 0; i < NumParts; ++i) {
1845 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1847 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1848 for (
unsigned j = 1; j <
MI.getNumOperands(); j += 2)
1849 MIB.
addUse(SrcRegs[j / 2][i]).
add(
MI.getOperand(j + 1));
1852 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), DstRegs);
1854 MI.eraseFromParent();
1857 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1858 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1862 int OpIdx =
MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1868 case TargetOpcode::G_ICMP: {
1870 LLT SrcTy = MRI.getType(LHS);
1876 if (!
extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1882 if (!
extractParts(
MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1883 RHSPartRegs, RHSLeftoverRegs,
MIRBuilder, MRI))
1889 LLT ResTy = MRI.getType(Dst);
1894 auto Zero =
MIRBuilder.buildConstant(NarrowTy, 0);
1896 for (
auto LHSAndRHS :
zip(LHSPartRegs, RHSPartRegs)) {
1897 auto LHS = std::get<0>(LHSAndRHS);
1898 auto RHS = std::get<1>(LHSAndRHS);
1899 auto Xor =
MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1906 for (
auto LHSAndRHS :
zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1907 auto LHS = std::get<0>(LHSAndRHS);
1908 auto RHS = std::get<1>(LHSAndRHS);
1909 auto Xor =
MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1910 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy,
Xor);
1911 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1912 TargetOpcode::G_ZEXT);
1919 assert(Xors.
size() >= 2 &&
"Should have gotten at least two Xors?");
1920 auto Or =
MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1921 for (
unsigned I = 2, E = Xors.
size();
I < E; ++
I)
1926 for (
unsigned I = 0, E = LHSPartRegs.
size();
I != E; ++
I) {
1930 if (
I == E - 1 && LHSLeftoverRegs.
empty()) {
1935 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1939 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[
I],
1942 auto Cmp =
MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[
I],
1945 LHSPartRegs[
I], RHSPartRegs[
I]);
1946 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1952 for (
unsigned I = 0, E = LHSLeftoverRegs.
size();
I != E; ++
I) {
1961 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1965 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[
I],
1966 RHSLeftoverRegs[
I]);
1968 auto Cmp =
MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[
I],
1969 RHSLeftoverRegs[
I]);
1972 LHSLeftoverRegs[
I], RHSLeftoverRegs[
I]);
1973 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1979 MI.eraseFromParent();
1982 case TargetOpcode::G_FCMP:
1991 case TargetOpcode::G_SEXT_INREG: {
1995 int64_t SizeInBits =
MI.getOperand(2).getImm();
2004 auto TruncMIB =
MIRBuilder.buildTrunc(NarrowTy, MO1);
2005 MO1.
setReg(TruncMIB.getReg(0));
2008 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
2020 if (SizeOp0 % NarrowSize != 0)
2022 int NumParts = SizeOp0 / NarrowSize;
2030 for (
int i = 0; i < NumParts; ++i) {
2031 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
2046 for (
int i = 0; i < NumParts; ++i) {
2049 PartialExtensionReg = DstRegs.
back();
2051 assert(PartialExtensionReg &&
2052 "Expected to visit partial extension before full");
2053 if (FullExtensionReg) {
2058 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
2060 FullExtensionReg = DstRegs.
back();
2065 TargetOpcode::G_SEXT_INREG, {NarrowTy},
2068 PartialExtensionReg = DstRegs.
back();
2074 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
2075 MI.eraseFromParent();
2078 case TargetOpcode::G_BSWAP:
2079 case TargetOpcode::G_BITREVERSE: {
2080 if (SizeOp0 % NarrowSize != 0)
2085 unsigned NumParts = SizeOp0 / NarrowSize;
2086 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
2089 for (
unsigned i = 0; i < NumParts; ++i) {
2090 auto DstPart =
MIRBuilder.buildInstr(
MI.getOpcode(), {NarrowTy},
2091 {SrcRegs[NumParts - 1 - i]});
2095 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), DstRegs);
2098 MI.eraseFromParent();
2101 case TargetOpcode::G_PTR_ADD:
2102 case TargetOpcode::G_PTRMASK: {
2110 case TargetOpcode::G_FPTOUI:
2111 case TargetOpcode::G_FPTOSI:
2112 case TargetOpcode::G_FPTOUI_SAT:
2113 case TargetOpcode::G_FPTOSI_SAT:
2115 case TargetOpcode::G_FPEXT:
2122 case TargetOpcode::G_FLDEXP:
2123 case TargetOpcode::G_STRICT_FLDEXP:
2125 case TargetOpcode::G_VSCALE: {
2127 LLT Ty = MRI.getType(Dst);
2131 auto VScaleBase =
MIRBuilder.buildVScale(NarrowTy, One);
2132 auto ZExt =
MIRBuilder.buildZExt(Ty, VScaleBase);
2133 auto C =
MIRBuilder.buildConstant(Ty, *
MI.getOperand(1).getCImm());
2136 MI.eraseFromParent();
2143 LLT Ty = MRI.getType(Val);
2149 if (Ty.isPointer()) {
2150 if (
DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2152 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2158 if (Ty.isPointerVector())
2159 NewVal =
MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2160 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2164 unsigned OpIdx,
unsigned ExtOpcode) {
2166 auto ExtB =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2167 MO.
setReg(ExtB.getReg(0));
2173 auto ExtB =
MIRBuilder.buildTrunc(NarrowTy, MO);
2174 MO.
setReg(ExtB.getReg(0));
2178 unsigned OpIdx,
unsigned TruncOpcode) {
2180 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2182 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2187 unsigned OpIdx,
unsigned ExtOpcode) {
2189 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2191 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2200 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2202 MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
2208 MO.
setReg(
MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
2218 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2225LegalizerHelper::widenScalarMergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
2230 auto [DstReg, DstTy, Src1Reg, Src1Ty] =
MI.getFirst2RegLLTs();
2231 if (DstTy.isVector())
2236 const int SrcSize = SrcTy.getSizeInBits();
2238 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2240 unsigned NumOps =
MI.getNumOperands();
2241 unsigned NumSrc =
MI.getNumOperands() - 1;
2242 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2244 if (WideSize >= DstSize) {
2248 for (
unsigned I = 2;
I !=
NumOps; ++
I) {
2249 const unsigned Offset = (
I - 1) * PartSize;
2262 ResultReg = NextResult;
2265 if (WideSize > DstSize)
2267 else if (DstTy.isPointer())
2270 MI.eraseFromParent();
2295 const int GCD = std::gcd(SrcSize, WideSize);
2305 if (GCD == SrcSize) {
2308 auto Unmerge =
MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2309 for (
int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2315 if (
static_cast<int>(Unmerges.
size()) != NumMerge * WideSize) {
2317 for (
int I = Unmerges.
size();
I != NumMerge * WideSize; ++
I)
2321 const int PartsPerGCD = WideSize / GCD;
2325 for (
int I = 0;
I != NumMerge; ++
I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2327 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2334 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2336 auto FinalMerge =
MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2337 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2340 MI.eraseFromParent();
2345LegalizerHelper::widenScalarUnmergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
2350 int NumDst =
MI.getNumOperands() - 1;
2351 Register SrcReg =
MI.getOperand(NumDst).getReg();
2352 LLT SrcTy = MRI.getType(SrcReg);
2356 Register Dst0Reg =
MI.getOperand(0).getReg();
2357 LLT DstTy = MRI.getType(Dst0Reg);
2366 dbgs() <<
"Not casting non-integral address space integer\n");
2371 SrcReg =
MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2379 SrcReg =
MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2387 for (
int I = 1;
I != NumDst; ++
I) {
2388 auto ShiftAmt =
MIRBuilder.buildConstant(SrcTy, DstSize *
I);
2389 auto Shr =
MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2393 MI.eraseFromParent();
2404 LLVM_DEBUG(
dbgs() <<
"Widening pointer source types not implemented\n");
2408 WideSrc =
MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2411 auto Unmerge =
MIRBuilder.buildUnmerge(WideTy, WideSrc);
2429 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2434 if (PartsPerRemerge == 1) {
2437 for (
int I = 0;
I != NumUnmerge; ++
I) {
2438 auto MIB =
MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2440 for (
int J = 0; J != PartsPerUnmerge; ++J) {
2441 int Idx =
I * PartsPerUnmerge + J;
2443 MIB.addDef(
MI.getOperand(Idx).getReg());
2446 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2450 MIB.addUse(Unmerge.getReg(
I));
2453 SmallVector<Register, 16> Parts;
2454 for (
int J = 0; J != NumUnmerge; ++J)
2455 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2458 for (
int I = 0;
I != NumDst; ++
I) {
2459 for (
int J = 0; J < PartsPerRemerge; ++J) {
2460 const int Idx =
I * PartsPerRemerge + J;
2464 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(
I).getReg(), RemergeParts);
2465 RemergeParts.
clear();
2469 MI.eraseFromParent();
2474LegalizerHelper::widenScalarExtract(
MachineInstr &
MI,
unsigned TypeIdx,
2476 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
2477 unsigned Offset =
MI.getOperand(2).getImm();
2480 if (SrcTy.
isVector() || DstTy.isVector())
2492 Src =
MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2496 if (DstTy.isPointer())
2503 MI.eraseFromParent();
2508 LLT ShiftTy = SrcTy;
2517 MI.eraseFromParent();
2548LegalizerHelper::widenScalarInsert(
MachineInstr &
MI,
unsigned TypeIdx,
2550 if (TypeIdx != 0 || WideTy.
isVector())
2560LegalizerHelper::widenScalarAddSubOverflow(
MachineInstr &
MI,
unsigned TypeIdx,
2564 std::optional<Register> CarryIn;
2565 switch (
MI.getOpcode()) {
2568 case TargetOpcode::G_SADDO:
2569 Opcode = TargetOpcode::G_ADD;
2570 ExtOpcode = TargetOpcode::G_SEXT;
2572 case TargetOpcode::G_SSUBO:
2573 Opcode = TargetOpcode::G_SUB;
2574 ExtOpcode = TargetOpcode::G_SEXT;
2576 case TargetOpcode::G_UADDO:
2577 Opcode = TargetOpcode::G_ADD;
2578 ExtOpcode = TargetOpcode::G_ZEXT;
2580 case TargetOpcode::G_USUBO:
2581 Opcode = TargetOpcode::G_SUB;
2582 ExtOpcode = TargetOpcode::G_ZEXT;
2584 case TargetOpcode::G_SADDE:
2585 Opcode = TargetOpcode::G_UADDE;
2586 ExtOpcode = TargetOpcode::G_SEXT;
2587 CarryIn =
MI.getOperand(4).getReg();
2589 case TargetOpcode::G_SSUBE:
2590 Opcode = TargetOpcode::G_USUBE;
2591 ExtOpcode = TargetOpcode::G_SEXT;
2592 CarryIn =
MI.getOperand(4).getReg();
2594 case TargetOpcode::G_UADDE:
2595 Opcode = TargetOpcode::G_UADDE;
2596 ExtOpcode = TargetOpcode::G_ZEXT;
2597 CarryIn =
MI.getOperand(4).getReg();
2599 case TargetOpcode::G_USUBE:
2600 Opcode = TargetOpcode::G_USUBE;
2601 ExtOpcode = TargetOpcode::G_ZEXT;
2602 CarryIn =
MI.getOperand(4).getReg();
2618 auto LHSExt =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {
MI.getOperand(2)});
2619 auto RHSExt =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {
MI.getOperand(3)});
2623 LLT CarryOutTy = MRI.getType(
MI.getOperand(1).getReg());
2625 .buildInstr(Opcode, {WideTy, CarryOutTy},
2626 {LHSExt, RHSExt, *CarryIn})
2629 NewOp =
MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).
getReg(0);
2631 LLT OrigTy = MRI.getType(
MI.getOperand(0).getReg());
2632 auto TruncOp =
MIRBuilder.buildTrunc(OrigTy, NewOp);
2633 auto ExtOp =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2638 MI.eraseFromParent();
2643LegalizerHelper::widenScalarAddSubShlSat(
MachineInstr &
MI,
unsigned TypeIdx,
2645 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2646 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2647 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2648 bool IsShift =
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2649 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2662 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2669 auto ShiftK =
MIRBuilder.buildConstant(WideTy, SHLAmount);
2673 auto WideInst =
MIRBuilder.buildInstr(
MI.getOpcode(), {WideTy},
2674 {ShiftL, ShiftR},
MI.getFlags());
2679 :
MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2682 MI.eraseFromParent();
2687LegalizerHelper::widenScalarMulo(
MachineInstr &
MI,
unsigned TypeIdx,
2696 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULO;
2698 LLT SrcTy = MRI.getType(
LHS);
2699 LLT OverflowTy = MRI.getType(OriginalOverflow);
2706 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2707 auto LeftOperand =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
LHS});
2708 auto RightOperand =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
RHS});
2715 WideMulCanOverflow ?
MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2717 MachineInstrBuilder Mulo;
2718 if (WideMulCanOverflow)
2719 Mulo =
MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2720 {LeftOperand, RightOperand});
2722 Mulo =
MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2727 MachineInstrBuilder ExtResult;
2734 ExtResult =
MIRBuilder.buildSExtInReg(WideTy,
Mul, SrcBitWidth);
2738 ExtResult =
MIRBuilder.buildZExtInReg(WideTy,
Mul, SrcBitWidth);
2741 if (WideMulCanOverflow) {
2749 MI.eraseFromParent();
2755 unsigned Opcode =
MI.getOpcode();
2759 case TargetOpcode::G_ATOMICRMW_XCHG:
2760 case TargetOpcode::G_ATOMICRMW_ADD:
2761 case TargetOpcode::G_ATOMICRMW_SUB:
2762 case TargetOpcode::G_ATOMICRMW_AND:
2763 case TargetOpcode::G_ATOMICRMW_OR:
2764 case TargetOpcode::G_ATOMICRMW_XOR:
2765 case TargetOpcode::G_ATOMICRMW_MIN:
2766 case TargetOpcode::G_ATOMICRMW_MAX:
2767 case TargetOpcode::G_ATOMICRMW_UMIN:
2768 case TargetOpcode::G_ATOMICRMW_UMAX:
2769 assert(TypeIdx == 0 &&
"atomicrmw with second scalar type");
2775 case TargetOpcode::G_ATOMIC_CMPXCHG:
2776 assert(TypeIdx == 0 &&
"G_ATOMIC_CMPXCHG with second scalar type");
2783 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2793 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2798 case TargetOpcode::G_EXTRACT:
2799 return widenScalarExtract(
MI, TypeIdx, WideTy);
2800 case TargetOpcode::G_INSERT:
2801 return widenScalarInsert(
MI, TypeIdx, WideTy);
2802 case TargetOpcode::G_MERGE_VALUES:
2803 return widenScalarMergeValues(
MI, TypeIdx, WideTy);
2804 case TargetOpcode::G_UNMERGE_VALUES:
2805 return widenScalarUnmergeValues(
MI, TypeIdx, WideTy);
2806 case TargetOpcode::G_SADDO:
2807 case TargetOpcode::G_SSUBO:
2808 case TargetOpcode::G_UADDO:
2809 case TargetOpcode::G_USUBO:
2810 case TargetOpcode::G_SADDE:
2811 case TargetOpcode::G_SSUBE:
2812 case TargetOpcode::G_UADDE:
2813 case TargetOpcode::G_USUBE:
2814 return widenScalarAddSubOverflow(
MI, TypeIdx, WideTy);
2815 case TargetOpcode::G_UMULO:
2816 case TargetOpcode::G_SMULO:
2817 return widenScalarMulo(
MI, TypeIdx, WideTy);
2818 case TargetOpcode::G_SADDSAT:
2819 case TargetOpcode::G_SSUBSAT:
2820 case TargetOpcode::G_SSHLSAT:
2821 case TargetOpcode::G_UADDSAT:
2822 case TargetOpcode::G_USUBSAT:
2823 case TargetOpcode::G_USHLSAT:
2824 return widenScalarAddSubShlSat(
MI, TypeIdx, WideTy);
2825 case TargetOpcode::G_CTTZ:
2826 case TargetOpcode::G_CTTZ_ZERO_POISON:
2827 case TargetOpcode::G_CTLZ:
2828 case TargetOpcode::G_CTLZ_ZERO_POISON:
2829 case TargetOpcode::G_CTLS:
2830 case TargetOpcode::G_CTPOP: {
2843 case TargetOpcode::G_CTTZ:
2844 case TargetOpcode::G_CTTZ_ZERO_POISON:
2845 case TargetOpcode::G_CTLZ_ZERO_POISON:
2846 ExtOpc = TargetOpcode::G_ANYEXT;
2848 case TargetOpcode::G_CTLS:
2849 ExtOpc = TargetOpcode::G_SEXT;
2852 ExtOpc = TargetOpcode::G_ZEXT;
2855 auto MIBSrc =
MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2856 LLT CurTy = MRI.getType(SrcReg);
2857 unsigned NewOpc = Opcode;
2858 if (NewOpc == TargetOpcode::G_CTTZ) {
2865 WideTy, MIBSrc,
MIRBuilder.buildConstant(WideTy, TopBit));
2867 NewOpc = TargetOpcode::G_CTTZ_ZERO_POISON;
2873 if (Opcode == TargetOpcode::G_CTLZ_ZERO_POISON) {
2883 auto MIBNewOp =
MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2885 if (Opcode == TargetOpcode::G_CTLZ || Opcode == TargetOpcode::G_CTLS) {
2890 WideTy, MIBNewOp,
MIRBuilder.buildConstant(WideTy, SizeDiff),
2891 Opcode == TargetOpcode::G_CTLZ
2896 MIRBuilder.buildZExtOrTrunc(
MI.getOperand(0), MIBNewOp);
2897 MI.eraseFromParent();
2900 case TargetOpcode::G_BSWAP: {
2904 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2905 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2906 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2909 MI.getOperand(0).setReg(DstExt);
2913 LLT Ty = MRI.getType(DstReg);
2915 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2916 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2922 case TargetOpcode::G_BITREVERSE: {
2926 LLT Ty = MRI.getType(DstReg);
2929 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2931 MI.getOperand(0).setReg(DstExt);
2934 auto ShiftAmt =
MIRBuilder.buildConstant(WideTy, DiffBits);
2935 auto Shift =
MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2940 case TargetOpcode::G_FREEZE:
2941 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2948 case TargetOpcode::G_ABS:
2955 case TargetOpcode::G_ADD:
2956 case TargetOpcode::G_AND:
2957 case TargetOpcode::G_MUL:
2958 case TargetOpcode::G_OR:
2959 case TargetOpcode::G_XOR:
2960 case TargetOpcode::G_SUB:
2961 case TargetOpcode::G_SHUFFLE_VECTOR:
2972 case TargetOpcode::G_SBFX:
2973 case TargetOpcode::G_UBFX:
2987 case TargetOpcode::G_SHL:
3003 case TargetOpcode::G_ROTR:
3004 case TargetOpcode::G_ROTL:
3013 case TargetOpcode::G_SDIV:
3014 case TargetOpcode::G_SREM:
3015 case TargetOpcode::G_SMIN:
3016 case TargetOpcode::G_SMAX:
3017 case TargetOpcode::G_ABDS:
3025 case TargetOpcode::G_SDIVREM:
3035 case TargetOpcode::G_ASHR:
3036 case TargetOpcode::G_LSHR:
3040 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
3041 : TargetOpcode::G_ZEXT;
3054 case TargetOpcode::G_UDIV:
3055 case TargetOpcode::G_UREM:
3056 case TargetOpcode::G_ABDU:
3063 case TargetOpcode::G_UDIVREM:
3072 case TargetOpcode::G_UMIN:
3073 case TargetOpcode::G_UMAX: {
3074 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
3076 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
3080 ? TargetOpcode::G_SEXT
3081 : TargetOpcode::G_ZEXT;
3091 case TargetOpcode::G_SELECT:
3101 bool IsVec = MRI.getType(
MI.getOperand(1).getReg()).isVector();
3108 case TargetOpcode::G_FPEXT:
3116 case TargetOpcode::G_FPTOSI:
3117 case TargetOpcode::G_FPTOUI:
3118 case TargetOpcode::G_INTRINSIC_LRINT:
3119 case TargetOpcode::G_INTRINSIC_LLRINT:
3120 case TargetOpcode::G_IS_FPCLASS:
3130 case TargetOpcode::G_SITOFP:
3140 case TargetOpcode::G_UITOFP:
3150 case TargetOpcode::G_FPTOSI_SAT:
3151 case TargetOpcode::G_FPTOUI_SAT:
3156 LLT Ty = MRI.getType(OldDst);
3157 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
3159 MI.getOperand(0).setReg(ExtReg);
3160 uint64_t ShortBits = Ty.getScalarSizeInBits();
3163 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3174 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
3175 NewDst =
MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
3183 NewDst =
MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3191 case TargetOpcode::G_LOAD:
3192 case TargetOpcode::G_SEXTLOAD:
3193 case TargetOpcode::G_ZEXTLOAD:
3194 case TargetOpcode::G_FPEXTLOAD:
3200 case TargetOpcode::G_STORE: {
3204 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
3205 assert(!Ty.isPointerOrPointerVector() &&
"Can't widen type");
3206 if (!Ty.isScalar()) {
3214 MI.setMemRefs(MF, {NewMMO});
3221 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3222 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3228 case TargetOpcode::G_FPTRUNCSTORE:
3235 case TargetOpcode::G_CONSTANT: {
3238 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3239 MRI.getType(
MI.getOperand(0).getReg()));
3240 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3241 ExtOpc == TargetOpcode::G_ANYEXT) &&
3244 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3248 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
3254 case TargetOpcode::G_FCONSTANT: {
3260 auto IntCst =
MIRBuilder.buildConstant(
MI.getOperand(0).getReg(), Val);
3262 MI.eraseFromParent();
3265 case TargetOpcode::G_IMPLICIT_DEF: {
3271 case TargetOpcode::G_BRCOND:
3277 case TargetOpcode::G_FCMP:
3288 case TargetOpcode::G_ICMP:
3293 LLT SrcTy = MRI.getType(
MI.getOperand(2).getReg());
3297 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
3298 unsigned ExtOpcode =
3302 ? TargetOpcode::G_SEXT
3303 : TargetOpcode::G_ZEXT;
3310 case TargetOpcode::G_PTR_ADD:
3311 assert(TypeIdx == 1 &&
"unable to legalize pointer of G_PTR_ADD");
3317 case TargetOpcode::G_PHI: {
3318 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
3321 for (
unsigned I = 1;
I <
MI.getNumOperands();
I += 2) {
3333 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3336 LLT VecTy = MRI.getType(VecReg);
3340 TargetOpcode::G_ANYEXT);
3354 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3370 LLT VecTy = MRI.getType(VecReg);
3389 case TargetOpcode::G_FADD:
3390 case TargetOpcode::G_FMUL:
3391 case TargetOpcode::G_FSUB:
3392 case TargetOpcode::G_FMA:
3393 case TargetOpcode::G_FMAD:
3394 case TargetOpcode::G_FNEG:
3395 case TargetOpcode::G_FABS:
3396 case TargetOpcode::G_FCANONICALIZE:
3397 case TargetOpcode::G_FMINNUM:
3398 case TargetOpcode::G_FMAXNUM:
3399 case TargetOpcode::G_FMINNUM_IEEE:
3400 case TargetOpcode::G_FMAXNUM_IEEE:
3401 case TargetOpcode::G_FMINIMUM:
3402 case TargetOpcode::G_FMAXIMUM:
3403 case TargetOpcode::G_FMINIMUMNUM:
3404 case TargetOpcode::G_FMAXIMUMNUM:
3405 case TargetOpcode::G_FDIV:
3406 case TargetOpcode::G_FREM:
3407 case TargetOpcode::G_FCEIL:
3408 case TargetOpcode::G_FFLOOR:
3409 case TargetOpcode::G_FCOS:
3410 case TargetOpcode::G_FSIN:
3411 case TargetOpcode::G_FTAN:
3412 case TargetOpcode::G_FACOS:
3413 case TargetOpcode::G_FASIN:
3414 case TargetOpcode::G_FATAN:
3415 case TargetOpcode::G_FATAN2:
3416 case TargetOpcode::G_FCOSH:
3417 case TargetOpcode::G_FSINH:
3418 case TargetOpcode::G_FTANH:
3419 case TargetOpcode::G_FLOG10:
3420 case TargetOpcode::G_FLOG:
3421 case TargetOpcode::G_FLOG2:
3422 case TargetOpcode::G_FRINT:
3423 case TargetOpcode::G_FNEARBYINT:
3424 case TargetOpcode::G_FSQRT:
3425 case TargetOpcode::G_FEXP:
3426 case TargetOpcode::G_FEXP2:
3427 case TargetOpcode::G_FEXP10:
3428 case TargetOpcode::G_FPOW:
3429 case TargetOpcode::G_INTRINSIC_TRUNC:
3430 case TargetOpcode::G_INTRINSIC_ROUND:
3431 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3435 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E; ++
I)
3441 case TargetOpcode::G_FMODF: {
3451 case TargetOpcode::G_FPOWI:
3452 case TargetOpcode::G_FLDEXP:
3453 case TargetOpcode::G_STRICT_FLDEXP: {
3455 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3476 case TargetOpcode::G_FFREXP: {
3489 case TargetOpcode::G_LROUND:
3490 case TargetOpcode::G_LLROUND:
3501 case TargetOpcode::G_INTTOPTR:
3509 case TargetOpcode::G_PTRTOINT:
3517 case TargetOpcode::G_BUILD_VECTOR: {
3521 for (
int I = 1, E =
MI.getNumOperands();
I != E; ++
I)
3527 MI.setDesc(
MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3535 case TargetOpcode::G_SEXT_INREG:
3544 case TargetOpcode::G_PTRMASK: {
3552 case TargetOpcode::G_VECREDUCE_ADD: {
3561 case TargetOpcode::G_VECREDUCE_FADD:
3562 case TargetOpcode::G_VECREDUCE_FMUL:
3563 case TargetOpcode::G_VECREDUCE_FMIN:
3564 case TargetOpcode::G_VECREDUCE_FMAX:
3565 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3566 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3571 LLT VecTy = MRI.getType(VecReg);
3578 case TargetOpcode::G_VSCALE: {
3585 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
3590 case TargetOpcode::G_SPLAT_VECTOR: {
3599 case TargetOpcode::G_INSERT_SUBVECTOR: {
3607 LLT SubVecTy = MRI.getType(SubVec);
3611 auto BigZExt =
MIRBuilder.buildZExt(WideTy, BigVec);
3612 auto SubZExt =
MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3613 auto WideInsert =
MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3617 auto SplatZero =
MIRBuilder.buildSplatVector(
3622 MI.eraseFromParent();
3631 auto Unmerge =
B.buildUnmerge(Ty, Src);
3632 for (
int I = 0,
E = Unmerge->getNumOperands() - 1;
I !=
E; ++
I)
3641 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
3655 MIRBuilder.
buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3664 MI.eraseFromParent();
3675 MI.eraseFromParent();
3682 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
3683 if (SrcTy.isVector()) {
3687 if (DstTy.isVector()) {
3688 int NumDstElt = DstTy.getNumElements();
3689 int NumSrcElt = SrcTy.getNumElements();
3692 LLT DstCastTy = DstEltTy;
3693 LLT SrcPartTy = SrcEltTy;
3697 if (NumSrcElt < NumDstElt) {
3708 SrcPartTy = SrcEltTy;
3709 }
else if (NumSrcElt > NumDstElt) {
3721 DstCastTy = DstEltTy;
3726 SrcReg =
MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3730 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3731 MI.eraseFromParent();
3735 if (DstTy.isVector()) {
3738 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3739 MI.eraseFromParent();
3755 unsigned NewEltSize,
3756 unsigned OldEltSize) {
3757 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3758 LLT IdxTy =
B.getMRI()->getType(Idx);
3761 auto OffsetMask =
B.buildConstant(
3763 auto OffsetIdx =
B.buildAnd(IdxTy, Idx, OffsetMask);
3764 return B.buildShl(IdxTy, OffsetIdx,
3765 B.buildConstant(IdxTy,
Log2_32(OldEltSize))).getReg(0);
3780 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] =
MI.getFirst3RegLLTs();
3784 unsigned OldNumElts = SrcVecTy.getNumElements();
3791 if (NewNumElts > OldNumElts) {
3802 if (NewNumElts % OldNumElts != 0)
3806 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3810 auto NewEltsPerOldEltK =
MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3813 auto NewBaseIdx =
MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3815 for (
unsigned I = 0;
I < NewEltsPerOldElt; ++
I) {
3816 auto IdxOffset =
MIRBuilder.buildConstant(IdxTy,
I);
3817 auto TmpIdx =
MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3818 auto Elt =
MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3819 NewOps[
I] = Elt.getReg(0);
3822 auto NewVec =
MIRBuilder.buildBuildVector(MidTy, NewOps);
3824 MI.eraseFromParent();
3828 if (NewNumElts < OldNumElts) {
3829 if (NewEltSize % OldEltSize != 0)
3851 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3852 auto Log2Ratio =
MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3855 auto ScaledIdx =
MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3859 WideElt =
MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3860 ScaledIdx).getReg(0);
3868 auto ExtractedBits =
MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3870 MI.eraseFromParent();
3884 LLT TargetTy =
B.getMRI()->getType(TargetReg);
3885 LLT InsertTy =
B.getMRI()->getType(InsertReg);
3886 auto ZextVal =
B.buildZExt(TargetTy, InsertReg);
3887 auto ShiftedInsertVal =
B.buildShl(TargetTy, ZextVal, OffsetBits);
3890 auto EltMask =
B.buildConstant(
3894 auto ShiftedMask =
B.buildShl(TargetTy, EltMask, OffsetBits);
3895 auto InvShiftedMask =
B.buildNot(TargetTy, ShiftedMask);
3898 auto MaskedOldElt =
B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3902 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3916 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3917 MI.getFirst4RegLLTs();
3929 if (NewNumElts < OldNumElts) {
3930 if (NewEltSize % OldEltSize != 0)
3939 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3940 auto Log2Ratio =
MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3943 auto ScaledIdx =
MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3947 ExtractedElt =
MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3948 ScaledIdx).getReg(0);
3958 InsertedElt =
MIRBuilder.buildInsertVectorElement(
3959 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3963 MI.eraseFromParent();
3993 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
3997 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3998 return UnableToLegalize;
4003 for (
unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
4005 MIRBuilder.
buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
4014 MI.eraseFromParent();
4032 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
4033 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
4043 auto Inp1 =
MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
4044 auto Inp2 =
MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
4046 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
4047 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
4049 MI.eraseFromParent();
4079 LLT DstTy = MRI.getType(Dst);
4080 LLT SrcTy = MRI.getType(Src);
4086 if (DstTy == CastTy)
4094 if (CastEltSize < DstEltSize)
4097 auto AdjustAmt = CastEltSize / DstEltSize;
4098 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4099 SrcTyMinElts % AdjustAmt != 0)
4104 auto CastVec =
MIRBuilder.buildBitcast(SrcTy, Src);
4105 auto PromotedES =
MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
4108 ES->eraseFromParent();
4143 LLT DstTy = MRI.getType(Dst);
4144 LLT BigVecTy = MRI.getType(BigVec);
4145 LLT SubVecTy = MRI.getType(SubVec);
4147 if (DstTy == CastTy)
4162 if (CastEltSize < DstEltSize)
4165 auto AdjustAmt = CastEltSize / DstEltSize;
4166 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4167 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
4173 auto CastBigVec =
MIRBuilder.buildBitcast(BigVecTy, BigVec);
4174 auto CastSubVec =
MIRBuilder.buildBitcast(SubVecTy, SubVec);
4176 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
4179 ES->eraseFromParent();
4187 LLT DstTy = MRI.getType(DstReg);
4197 if (MemSizeInBits != MemStoreSizeInBits) {
4214 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
4218 auto NewLoad =
MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4219 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
4221 auto NewLoad =
MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4224 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4226 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4229 if (DstTy != LoadTy)
4237 if (
MIRBuilder.getDataLayout().isBigEndian())
4255 uint64_t LargeSplitSize, SmallSplitSize;
4260 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4267 if (TLI.allowsMemoryAccess(Ctx,
MIRBuilder.getDataLayout(), MemTy, MMO))
4270 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4281 if (Alignment.
value() * 8 > MemSizeInBits &&
4286 auto NewLoad =
MIRBuilder.buildLoad(MoreTy, PtrReg, *NewMMO);
4303 LLT PtrTy = MRI.getType(PtrReg);
4316 auto LargeLoad =
MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4319 auto OffsetCst =
MIRBuilder.buildConstant(OffsetCstRes, LargeSplitSize / 8);
4320 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4321 auto SmallPtr =
MIRBuilder.buildObjectPtrOffset(PtrAddReg, PtrReg, OffsetCst);
4323 SmallPtr, *SmallMMO);
4325 auto ShiftAmt =
MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4326 auto Shift =
MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4328 if (AnyExtTy == DstTy)
4329 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4331 auto Or =
MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4335 auto Or =
MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4355 LLT SrcTy = MRI.getType(SrcReg);
4363 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4369 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4371 SrcReg =
MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4375 auto ZextInReg =
MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4379 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4394 uint64_t LargeSplitSize, SmallSplitSize;
4401 if (TLI.allowsMemoryAccess(Ctx,
MIRBuilder.getDataLayout(), MemTy, MMO))
4404 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4413 if (SrcTy.isPointer()) {
4418 auto ExtVal =
MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4421 auto ShiftAmt =
MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4422 auto SmallVal =
MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4425 LLT PtrTy = MRI.getType(PtrReg);
4427 LargeSplitSize / 8);
4428 auto SmallPtr =
MIRBuilder.buildObjectPtrOffset(PtrTy, PtrReg, OffsetCst);
4434 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4435 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4444 LLT SrcTy = MRI.getType(SrcReg);
4450 assert(SrcTy.isVector() &&
"Expect a vector store type");
4457 auto CurrVal =
MIRBuilder.buildConstant(IntTy, 0);
4461 auto Elt =
MIRBuilder.buildExtractVectorElement(
4462 SrcTy.getElementType(), SrcReg,
MIRBuilder.buildConstant(IdxTy,
I));
4463 auto Trunc =
MIRBuilder.buildTrunc(MemScalarTy, Elt);
4464 auto ZExt =
MIRBuilder.buildZExt(IntTy, Trunc);
4470 auto Shifted =
MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4471 CurrVal =
MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4475 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4486 switch (
MI.getOpcode()) {
4487 case TargetOpcode::G_LOAD: {
4505 case TargetOpcode::G_STORE: {
4521 case TargetOpcode::G_SELECT: {
4525 if (MRI.getType(
MI.getOperand(1).getReg()).isVector()) {
4527 dbgs() <<
"bitcast action not implemented for vector select\n");
4538 case TargetOpcode::G_AND:
4539 case TargetOpcode::G_OR:
4540 case TargetOpcode::G_XOR: {
4548 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4550 case TargetOpcode::G_INSERT_VECTOR_ELT:
4552 case TargetOpcode::G_CONCAT_VECTORS:
4554 case TargetOpcode::G_SHUFFLE_VECTOR:
4556 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4558 case TargetOpcode::G_INSERT_SUBVECTOR:
4566void LegalizerHelper::changeOpcode(
MachineInstr &
MI,
unsigned NewOpcode) {
4575 switch(
MI.getOpcode()) {
4578 case TargetOpcode::G_FCONSTANT:
4580 case TargetOpcode::G_BITCAST:
4582 case TargetOpcode::G_SREM:
4583 case TargetOpcode::G_UREM: {
4584 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4586 MIRBuilder.buildInstr(
MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4587 {MI.getOperand(1), MI.getOperand(2)});
4589 auto Prod =
MIRBuilder.buildMul(Ty, Quot,
MI.getOperand(2));
4591 MI.eraseFromParent();
4594 case TargetOpcode::G_SADDO:
4595 case TargetOpcode::G_SSUBO:
4597 case TargetOpcode::G_SADDE:
4599 case TargetOpcode::G_SSUBE:
4601 case TargetOpcode::G_UMULH:
4602 case TargetOpcode::G_SMULH:
4604 case TargetOpcode::G_SMULO:
4605 case TargetOpcode::G_UMULO: {
4608 auto [Res, Overflow, LHS, RHS] =
MI.getFirst4Regs();
4609 LLT Ty = MRI.getType(Res);
4611 unsigned Opcode =
MI.getOpcode() == TargetOpcode::G_SMULO
4612 ? TargetOpcode::G_SMULH
4613 : TargetOpcode::G_UMULH;
4617 MI.setDesc(
TII.get(TargetOpcode::G_MUL));
4618 MI.removeOperand(1);
4621 auto HiPart =
MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4629 if (Opcode == TargetOpcode::G_SMULH) {
4630 auto ShiftAmt =
MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4631 auto Shifted =
MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4638 case TargetOpcode::G_FNEG: {
4639 auto [Res, ResTy, SubByReg, SubByRegTy] =
MI.getFirst2RegLLTs();
4642 Register CastedSubByReg = SubByReg;
4644 if (!SubByRegTy.getScalarType().isAnyScalar() &&
4645 !SubByRegTy.getScalarType().isInteger()) {
4646 auto BitcastDst = SubByRegTy.changeElementType(
4648 CastedSubByReg =
MIRBuilder.buildBitcast(BitcastDst, SubByReg).getReg(0);
4654 if (ResTy != TyInt) {
4656 MIRBuilder.buildXor(TyInt, CastedSubByReg, SignMask).getReg(0);
4659 MIRBuilder.buildXor(Res, CastedSubByReg, SignMask).getReg(0);
4661 MI.eraseFromParent();
4664 case TargetOpcode::G_FSUB:
4665 case TargetOpcode::G_STRICT_FSUB: {
4666 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
4667 LLT Ty = MRI.getType(Res);
4672 if (
MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4673 MIRBuilder.buildStrictFAdd(Res, LHS, Neg,
MI.getFlags());
4677 MI.eraseFromParent();
4680 case TargetOpcode::G_FMAD:
4682 case TargetOpcode::G_FFLOOR:
4684 case TargetOpcode::G_LROUND:
4685 case TargetOpcode::G_LLROUND: {
4688 LLT SrcTy = MRI.getType(SrcReg);
4689 auto Round =
MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4692 MI.eraseFromParent();
4695 case TargetOpcode::G_INTRINSIC_ROUND:
4697 case TargetOpcode::G_FRINT: {
4700 changeOpcode(
MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4703 case TargetOpcode::G_INTRINSIC_LRINT:
4704 case TargetOpcode::G_INTRINSIC_LLRINT: {
4707 LLT SrcTy = MRI.getType(SrcReg);
4709 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4711 MI.eraseFromParent();
4714 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4715 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] =
MI.getFirst5Regs();
4716 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4717 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4718 **
MI.memoperands_begin());
4720 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4721 MI.eraseFromParent();
4724 case TargetOpcode::G_LOAD:
4725 case TargetOpcode::G_SEXTLOAD:
4726 case TargetOpcode::G_ZEXTLOAD:
4728 case TargetOpcode::G_STORE:
4730 case TargetOpcode::G_CTLZ_ZERO_POISON:
4731 case TargetOpcode::G_CTTZ_ZERO_POISON:
4732 case TargetOpcode::G_CTLZ:
4733 case TargetOpcode::G_CTTZ:
4734 case TargetOpcode::G_CTPOP:
4735 case TargetOpcode::G_CTLS:
4738 auto [Res, CarryOut, LHS, RHS] =
MI.getFirst4Regs();
4740 Register NewRes = MRI.cloneVirtualRegister(Res);
4747 MI.eraseFromParent();
4751 auto [Res, CarryOut, LHS, RHS, CarryIn] =
MI.getFirst5Regs();
4752 const LLT CondTy = MRI.getType(CarryOut);
4753 const LLT Ty = MRI.getType(Res);
4755 Register NewRes = MRI.cloneVirtualRegister(Res);
4758 auto TmpRes =
MIRBuilder.buildAdd(Ty, LHS, RHS);
4764 auto ZExtCarryIn =
MIRBuilder.buildZExt(Ty, CarryIn);
4765 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4772 auto Carry2 =
MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4777 MI.eraseFromParent();
4781 auto [Res, BorrowOut, LHS, RHS] =
MI.getFirst4Regs();
4786 MI.eraseFromParent();
4790 auto [Res, BorrowOut, LHS, RHS, BorrowIn] =
MI.getFirst5Regs();
4791 const LLT CondTy = MRI.getType(BorrowOut);
4792 const LLT Ty = MRI.getType(Res);
4795 auto TmpRes =
MIRBuilder.buildSub(Ty, LHS, RHS);
4801 auto ZExtBorrowIn =
MIRBuilder.buildZExt(Ty, BorrowIn);
4802 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4809 auto Borrow2 =
MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4810 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4812 MI.eraseFromParent();
4852 case G_MERGE_VALUES:
4854 case G_UNMERGE_VALUES:
4856 case TargetOpcode::G_SEXT_INREG: {
4857 assert(
MI.getOperand(2).isImm() &&
"Expected immediate");
4858 int64_t SizeInBits =
MI.getOperand(2).getImm();
4860 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
4861 LLT DstTy = MRI.getType(DstReg);
4862 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4865 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4866 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4867 MI.eraseFromParent();
4870 case G_EXTRACT_VECTOR_ELT:
4871 case G_INSERT_VECTOR_ELT:
4873 case G_SHUFFLE_VECTOR:
4875 case G_VECTOR_COMPRESS:
4877 case G_DYN_STACKALLOC:
4879 case G_INSERT_SUBVECTOR: {
4880 if (MRI.getType(
MI.getOperand(1).getReg()).isScalable() ||
4881 MRI.getType(
MI.getOperand(2).getReg()).isScalable())
4886 Register Subvector =
MI.getOperand(2).getReg();
4887 auto InsertionPointImm =
MI.getOperand(3).getImm();
4890 LLT SubvectorTy = MRI.getType(Subvector);
4894 bool InsertInLowHalf = InsertionPointImm == 0;
4895 auto Extract =
MIRBuilder.buildExtractSubvector(
4899 auto LowHalf = InsertInLowHalf ? Subvector : Extract.getReg(0);
4900 auto HighHalf = InsertInLowHalf ? Extract.getReg(0) : Subvector;
4902 MIRBuilder.buildInstr(TargetOpcode::G_CONCAT_VECTORS, {
MI.getOperand(0)},
4903 {LowHalf, HighHalf});
4904 MI.eraseFromParent();
4910 Register ExtendedSubvector = MRI.createGenericVirtualRegister(VectorTy);
4911 MIRBuilder.buildPadVectorWithUndefElements(ExtendedSubvector, Subvector);
4917 if (i >= InsertionPointImm &&
4919 Mask.push_back(VectorTy.
getNumElements() + i - InsertionPointImm);
4927 MI.eraseFromParent();
4933 case G_STACKRESTORE:
4943 case G_READ_REGISTER:
4944 case G_WRITE_REGISTER:
4951 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4952 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4958 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4963 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4974 bool IsSigned =
MI.getOpcode() == G_ABDS;
4975 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4976 if ((IsSigned && LI.isLegal({G_SMIN, Ty}) && LI.isLegal({G_SMAX, Ty})) ||
4977 (!IsSigned && LI.isLegal({G_UMIN, Ty}) && LI.isLegal({G_UMAX, Ty}))) {
5000 case G_MEMCPY_INLINE:
5001 case G_MEMSET_INLINE:
5013 case G_ATOMICRMW_SUB: {
5014 auto [Ret, Mem, Val] =
MI.getFirst3Regs();
5015 const LLT ValTy = MRI.getType(Val);
5019 MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, Ret, Mem, VNeg, *MMO);
5020 MI.eraseFromParent();
5046 unsigned AddrSpace =
DL.getAllocaAddrSpace();
5050 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
5056 Align StackTypeAlign =
5063 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
5064 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
5069 LLT IdxTy =
B.getMRI()->getType(IdxReg);
5081 return B.buildAnd(IdxTy, IdxReg,
B.buildConstant(IdxTy, Imm)).getReg(0);
5084 return B.buildUMin(IdxTy, IdxReg,
B.buildConstant(IdxTy, NElts - 1))
5095 "Converting bits to bytes lost precision");
5101 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
5102 unsigned IndexSizeInBits =
DL.getIndexSize(AS) * 8;
5104 if (IdxTy != MRI.getType(Index))
5105 Index =
MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
5110 LLT PtrTy = MRI.getType(VecPtr);
5111 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr,
Mul).getReg(0);
5119 std::initializer_list<unsigned> NonVecOpIndices) {
5120 if (
MI.getNumMemOperands() != 0)
5137 if (!Ty.isVector()) {
5143 if (Ty.getNumElements() != NumElts)
5158 assert(Ty.isVector() &&
"Expected vector type");
5160 int NumParts, NumLeftover;
5161 std::tie(NumParts, NumLeftover) =
5164 assert(NumParts > 0 &&
"Error in getNarrowTypeBreakDown");
5165 for (
int i = 0; i < NumParts; ++i) {
5170 assert(NumLeftover == 1 &&
"expected exactly one leftover");
5179 for (
unsigned i = 0; i <
N; ++i) {
5181 Ops.push_back(
Op.getReg());
5182 else if (
Op.isImm())
5183 Ops.push_back(
Op.getImm());
5184 else if (
Op.isPredicate())
5206 std::initializer_list<unsigned> NonVecOpIndices) {
5208 "Non-compatible opcode or not specified non-vector operands");
5209 unsigned OrigNumElts = MRI.getType(
MI.getReg(0)).getNumElements();
5211 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
5212 unsigned NumDefs =
MI.getNumDefs();
5220 for (
unsigned i = 0; i < NumDefs; ++i) {
5221 makeDstOps(OutputOpsPieces[i], MRI.getType(
MI.getReg(i)), NumElts);
5229 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
5230 ++UseIdx, ++UseNo) {
5233 MI.getOperand(UseIdx));
5242 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5246 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5248 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5249 Defs.
push_back(OutputOpsPieces[DstNo][i]);
5252 for (
unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
5253 Uses.push_back(InputOpsPieces[InputNo][i]);
5256 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5257 OutputRegs[DstNo].push_back(
I.getReg(DstNo));
5262 for (
unsigned i = 0; i < NumDefs; ++i)
5263 mergeMixedSubvectors(
MI.getReg(i), OutputRegs[i]);
5265 for (
unsigned i = 0; i < NumDefs; ++i)
5266 MIRBuilder.buildMergeLikeInstr(
MI.getReg(i), OutputRegs[i]);
5269 MI.eraseFromParent();
5276 unsigned OrigNumElts = MRI.getType(
MI.getReg(0)).getNumElements();
5278 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
5279 unsigned NumDefs =
MI.getNumDefs();
5283 makeDstOps(OutputOpsPieces, MRI.getType(
MI.getReg(0)), NumElts);
5288 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
5289 UseIdx += 2, ++UseNo) {
5297 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5299 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5300 auto Phi =
MIRBuilder.buildInstr(TargetOpcode::G_PHI);
5302 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
5305 for (
unsigned j = 0; j < NumInputs / 2; ++j) {
5306 Phi.addUse(InputOpsPieces[j][i]);
5307 Phi.add(
MI.getOperand(1 + j * 2 + 1));
5317 mergeMixedSubvectors(
MI.getReg(0), OutputRegs);
5319 MIRBuilder.buildMergeLikeInstr(
MI.getReg(0), OutputRegs);
5322 MI.eraseFromParent();
5330 const int NumDst =
MI.getNumOperands() - 1;
5331 const Register SrcReg =
MI.getOperand(NumDst).getReg();
5332 LLT DstTy = MRI.getType(
MI.getOperand(0).getReg());
5333 LLT SrcTy = MRI.getType(SrcReg);
5335 if (TypeIdx != 1 || NarrowTy == DstTy)
5342 assert(SrcTy.isVector() && NarrowTy.
isVector() &&
"Expected vector types");
5345 if ((SrcTy.getSizeInBits() % NarrowTy.
getSizeInBits() != 0) ||
5359 auto Unmerge =
MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5360 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5361 const int PartsPerUnmerge = NumDst / NumUnmerge;
5363 for (
int I = 0;
I != NumUnmerge; ++
I) {
5364 auto MIB =
MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5366 for (
int J = 0; J != PartsPerUnmerge; ++J)
5367 MIB.addDef(
MI.getOperand(
I * PartsPerUnmerge + J).getReg());
5368 MIB.addUse(Unmerge.getReg(
I));
5371 MI.eraseFromParent();
5378 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
5382 assert(DstTy.isVector() && NarrowTy.
isVector() &&
"Expected vector types");
5384 if (NarrowTy == SrcTy)
5392 assert(SrcTy.isVector() &&
"Expected vector types");
5394 if ((DstTy.getSizeInBits() % NarrowTy.
getSizeInBits() != 0) ||
5408 for (
unsigned i = 1; i <
MI.getNumOperands(); ++i) {
5409 auto Unmerge =
MIRBuilder.buildUnmerge(EltTy,
MI.getOperand(i).getReg());
5410 for (
unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5416 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5417 for (
unsigned i = 0,
Offset = 0; i < NumNarrowTyPieces;
5418 ++i,
Offset += NumNarrowTyElts) {
5421 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5424 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5425 MI.eraseFromParent();
5429 assert(TypeIdx == 0 &&
"Bad type index");
5430 if ((NarrowTy.
getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5445 unsigned NumParts = DstTy.getNumElements() / NarrowTy.
getNumElements();
5446 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5448 for (
unsigned i = 0; i < NumParts; ++i) {
5450 for (
unsigned j = 0; j < NumElts; ++j)
5451 Sources.
push_back(
MI.getOperand(1 + i * NumElts + j).getReg());
5453 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5456 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5457 MI.eraseFromParent();
5465 auto [DstReg, SrcVec] =
MI.getFirst2Regs();
5467 bool IsInsert =
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5469 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) &&
"not a vector type index");
5471 InsertVal =
MI.getOperand(2).getReg();
5473 Register Idx =
MI.getOperand(
MI.getNumOperands() - 1).getReg();
5474 LLT VecTy = MRI.getType(SrcVec);
5480 uint64_t IdxVal = MaybeCst->Value.getZExtValue();
5484 MI.eraseFromParent();
5493 SplitPieces[IdxVal] = InsertVal;
5494 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0).getReg(), SplitPieces);
5496 MIRBuilder.buildCopy(
MI.getOperand(0).getReg(), SplitPieces[IdxVal]);
5500 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5503 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5504 TargetOpcode::G_ANYEXT);
5508 LLT IdxTy = MRI.getType(Idx);
5509 int64_t PartIdx = IdxVal / NewNumElts;
5511 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5514 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5517 auto InsertPart =
MIRBuilder.buildInsertVectorElement(
5518 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5519 VecParts[PartIdx] = InsertPart.getReg(0);
5523 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5525 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5529 MI.eraseFromParent();
5549 LLVM_DEBUG(
dbgs() <<
"Can't narrow load/store to non-byte-sized type\n");
5561 LLT ValTy = MRI.getType(ValReg);
5570 int NumLeftover = -1;
5576 if (
extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5578 NumParts = NarrowRegs.
size();
5579 NumLeftover = NarrowLeftoverRegs.
size();
5586 LLT PtrTy = MRI.getType(AddrReg);
5596 auto MMO = LdStMI.
getMMO();
5598 unsigned NumParts,
unsigned Offset) ->
unsigned {
5601 for (
unsigned Idx = 0, E = NumParts; Idx != E &&
Offset < TotalSize;
5603 unsigned ByteOffset =
Offset / 8;
5606 MIRBuilder.materializeObjectPtrOffset(NewAddrReg, AddrReg, OffsetTy,
5613 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5614 ValRegs.push_back(Dst);
5615 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5617 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5626 unsigned HandledOffset =
5627 splitTypePieces(NarrowTy, NarrowRegs, NumParts,
Offset);
5631 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5634 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5635 LeftoverTy, NarrowLeftoverRegs);
5649 switch (
MI.getOpcode()) {
5650 case G_IMPLICIT_DEF:
5666 case G_FCANONICALIZE:
5683 case G_INTRINSIC_LRINT:
5684 case G_INTRINSIC_LLRINT:
5685 case G_INTRINSIC_ROUND:
5686 case G_INTRINSIC_ROUNDEVEN:
5689 case G_INTRINSIC_TRUNC:
5717 case G_FMINNUM_IEEE:
5718 case G_FMAXNUM_IEEE:
5740 case G_CTLZ_ZERO_POISON:
5742 case G_CTTZ_ZERO_POISON:
5759 case G_ADDRSPACE_CAST:
5772 case G_STRICT_FLDEXP:
5774 case G_TRUNC_SSAT_S:
5775 case G_TRUNC_SSAT_U:
5776 case G_TRUNC_USAT_U:
5784 if (MRI.getType(
MI.getOperand(1).getReg()).isVector())
5789 case G_UNMERGE_VALUES:
5791 case G_BUILD_VECTOR:
5792 assert(TypeIdx == 0 &&
"not a vector type index");
5794 case G_CONCAT_VECTORS:
5798 case G_EXTRACT_VECTOR_ELT:
5799 case G_INSERT_VECTOR_ELT:
5808 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5809 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5811 case G_SHUFFLE_VECTOR:
5817 case G_INTRINSIC_FPTRUNC_ROUND:
5827 assert(
MI.getOpcode() == TargetOpcode::G_BITCAST &&
5828 "Not a bitcast operation");
5833 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
5835 unsigned NewElemCount =
5838 if (NewElemCount == 1) {
5841 auto Unmerge =
MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg);
5848 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5857 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5858 MI.eraseFromParent();
5864 assert(
MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5868 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5869 MI.getFirst3RegLLTs();
5872 if (DstTy != Src1Ty)
5874 if (DstTy != Src2Ty)
5889 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5905 unsigned InputUsed[2] = {-1U, -1U};
5906 unsigned FirstMaskIdx =
High * NewElts;
5907 bool UseBuildVector =
false;
5908 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5910 int Idx = Mask[FirstMaskIdx + MaskOffset];
5915 if (
Input >= std::size(Inputs)) {
5922 Idx -=
Input * NewElts;
5926 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5927 if (InputUsed[OpNo] ==
Input) {
5930 }
else if (InputUsed[OpNo] == -1U) {
5932 InputUsed[OpNo] =
Input;
5937 if (OpNo >= std::size(InputUsed)) {
5940 UseBuildVector =
true;
5945 Ops.push_back(Idx + OpNo * NewElts);
5948 if (UseBuildVector) {
5953 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5955 int Idx = Mask[FirstMaskIdx + MaskOffset];
5960 if (
Input >= std::size(Inputs)) {
5967 Idx -=
Input * NewElts;
5971 .buildExtractVectorElement(
5972 EltTy, Inputs[
Input],
5978 Output =
MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5979 }
else if (InputUsed[0] == -1U) {
5981 Output =
MIRBuilder.buildUndef(NarrowTy).getReg(0);
5982 }
else if (NewElts == 1) {
5983 Output =
MIRBuilder.buildCopy(NarrowTy, Inputs[InputUsed[0]]).getReg(0);
5985 Register Op0 = Inputs[InputUsed[0]];
5989 : Inputs[InputUsed[1]];
5991 Output =
MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1,
Ops).getReg(0);
5998 MI.eraseFromParent();
6011 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
6017 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
6020 const unsigned NumParts =
6022 : SrcTy.getNumElements();
6026 if (DstTy != NarrowTy)
6032 unsigned NumPartsLeft = NumParts;
6033 while (NumPartsLeft > 1) {
6034 for (
unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
6037 .buildInstr(ScalarOpc, {NarrowTy},
6038 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
6041 SplitSrcs = PartialResults;
6042 PartialResults.
clear();
6043 NumPartsLeft = SplitSrcs.
size();
6047 MI.eraseFromParent();
6052 for (
unsigned Idx = 1; Idx < NumParts; ++Idx)
6053 Acc =
MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
6056 MI.eraseFromParent();
6060 for (
unsigned Part = 0; Part < NumParts; ++Part) {
6062 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
6070 return tryNarrowPow2Reduction(
MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
6073 Register Acc = PartialReductions[0];
6074 for (
unsigned Part = 1; Part < NumParts; ++Part) {
6075 if (Part == NumParts - 1) {
6077 {Acc, PartialReductions[Part]});
6080 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
6084 MI.eraseFromParent();
6090 unsigned int TypeIdx,
6092 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
6093 MI.getFirst3RegLLTs();
6094 if (!NarrowTy.
isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
6098 assert((
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
6099 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
6100 "Unexpected vecreduce opcode");
6101 unsigned ScalarOpc =
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
6102 ? TargetOpcode::G_FADD
6103 : TargetOpcode::G_FMUL;
6106 unsigned NumParts = SrcTy.getNumElements();
6109 for (
unsigned i = 0; i < NumParts; i++)
6110 Acc =
MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
6114 MI.eraseFromParent();
6121 unsigned ScalarOpc) {
6129 while (SplitSrcs.
size() > 1) {
6131 for (
unsigned Idx = 0; Idx < SplitSrcs.
size()-1; Idx += 2) {
6139 SplitSrcs = std::move(PartialRdxs);
6143 MI.getOperand(1).setReg(SplitSrcs[0]);
6150 const LLT HalfTy,
const LLT AmtTy) {
6152 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6153 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6157 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), {InL, InH});
6158 MI.eraseFromParent();
6164 unsigned VTBits = 2 * NVTBits;
6167 if (
MI.getOpcode() == TargetOpcode::G_SHL) {
6168 if (Amt.
ugt(VTBits)) {
6170 }
else if (Amt.
ugt(NVTBits)) {
6173 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6174 }
else if (Amt == NVTBits) {
6182 NVT, InL,
MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6185 }
else if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
6186 if (Amt.
ugt(VTBits)) {
6188 }
else if (Amt.
ugt(NVTBits)) {
6190 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6192 }
else if (Amt == NVTBits) {
6196 auto ShiftAmtConst =
MIRBuilder.buildConstant(AmtTy, Amt);
6198 auto OrLHS =
MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6200 NVT, InH,
MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6206 if (Amt.
ugt(VTBits)) {
6208 NVT, InH,
MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6209 }
else if (Amt.
ugt(NVTBits)) {
6211 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6213 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6214 }
else if (Amt == NVTBits) {
6217 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6219 auto ShiftAmtConst =
MIRBuilder.buildConstant(AmtTy, Amt);
6221 auto OrLHS =
MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6223 NVT, InH,
MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6230 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), {Lo, Hi});
6231 MI.eraseFromParent();
6247 LLT DstTy = MRI.getType(DstReg);
6252 LLT ShiftAmtTy = MRI.getType(Amt);
6254 if (DstEltSize % 2 != 0)
6270 const unsigned NumParts = DstEltSize / RequestedTy.
getSizeInBits();
6281 const unsigned NewBitSize = DstEltSize / 2;
6293 auto NewBits =
MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
6295 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6296 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6299 auto AmtExcess =
MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
6300 auto AmtLack =
MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
6302 auto Zero =
MIRBuilder.buildConstant(ShiftAmtTy, 0);
6307 switch (
MI.getOpcode()) {
6308 case TargetOpcode::G_SHL: {
6310 auto LoS =
MIRBuilder.buildShl(HalfTy, InL, Amt);
6312 auto LoOr =
MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
6313 auto HiOr =
MIRBuilder.buildShl(HalfTy, InH, Amt);
6314 auto HiS =
MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6317 auto LoL =
MIRBuilder.buildConstant(HalfTy, 0);
6318 auto HiL =
MIRBuilder.buildShl(HalfTy, InL, AmtExcess);
6320 auto Lo =
MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
6322 HalfTy, IsZero, InH,
MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
6324 ResultRegs[0] =
Lo.getReg(0);
6325 ResultRegs[1] =
Hi.getReg(0);
6328 case TargetOpcode::G_LSHR:
6329 case TargetOpcode::G_ASHR: {
6331 auto HiS =
MIRBuilder.buildInstr(
MI.getOpcode(), {HalfTy}, {InH, Amt});
6333 auto LoOr =
MIRBuilder.buildLShr(HalfTy, InL, Amt);
6334 auto HiOr =
MIRBuilder.buildShl(HalfTy, InH, AmtLack);
6335 auto LoS =
MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6339 if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
6342 auto ShiftAmt =
MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
6343 HiL =
MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt);
6345 auto LoL =
MIRBuilder.buildInstr(
MI.getOpcode(), {HalfTy},
6349 HalfTy, IsZero, InL,
MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
6351 auto Hi =
MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
6353 ResultRegs[0] =
Lo.getReg(0);
6354 ResultRegs[1] =
Hi.getReg(0);
6361 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
6362 MI.eraseFromParent();
6371 LLT TargetTy,
LLT ShiftAmtTy) {
6374 assert(WordShiftConst && BitShiftConst &&
"Expected constants");
6376 const unsigned ShiftWords = WordShiftConst->getZExtValue();
6377 const unsigned ShiftBits = BitShiftConst->getZExtValue();
6378 const bool NeedsInterWordShift = ShiftBits != 0;
6381 case TargetOpcode::G_SHL: {
6384 if (PartIdx < ShiftWords)
6387 unsigned SrcIdx = PartIdx - ShiftWords;
6388 if (!NeedsInterWordShift)
6389 return SrcParts[SrcIdx];
6394 auto Lo =
MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx - 1],
6398 return Hi.getReg(0);
6401 case TargetOpcode::G_LSHR: {
6402 unsigned SrcIdx = PartIdx + ShiftWords;
6403 if (SrcIdx >= NumParts)
6405 if (!NeedsInterWordShift)
6406 return SrcParts[SrcIdx];
6410 if (SrcIdx + 1 < NumParts) {
6411 auto Hi =
MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx + 1],
6415 return Lo.getReg(0);
6418 case TargetOpcode::G_ASHR: {
6420 unsigned SrcIdx = PartIdx + ShiftWords;
6421 if (SrcIdx >= NumParts)
6423 if (!NeedsInterWordShift)
6424 return SrcParts[SrcIdx];
6429 (SrcIdx == NumParts - 1)
6433 (SrcIdx + 1 < NumParts) ? SrcParts[SrcIdx + 1] : Params.
SignBit;
6455 unsigned MainOpcode = (Opcode == TargetOpcode::G_ASHR)
6456 ?
static_cast<unsigned>(TargetOpcode::G_LSHR)
6461 MIRBuilder.buildInstr(MainOpcode, {TargetTy}, {MainOperand, ShiftAmt})
6470 LLT ShiftAmtTy = MRI.getType(ShiftAmt);
6471 auto ZeroConst =
MIRBuilder.buildConstant(ShiftAmtTy, 0);
6473 auto IsZeroBitShift =
6481 unsigned CarryOpcode = (Opcode == TargetOpcode::G_SHL) ? TargetOpcode::G_LSHR
6482 : TargetOpcode::G_SHL;
6485 auto TargetBitsConst =
6487 auto InvShiftAmt =
MIRBuilder.buildSub(ShiftAmtTy, TargetBitsConst, ShiftAmt);
6492 .buildInstr(CarryOpcode, {TargetTy}, {CarryOperand, InvShiftAmt})
6497 auto ZeroReg =
MIRBuilder.buildConstant(TargetTy, 0);
6499 MIRBuilder.buildSelect(TargetTy, IsZeroBitShift, ZeroReg, CarryBits)
6503 return MIRBuilder.buildOr(TargetTy, MainShifted, SafeCarryBits).getReg(0);
6516 LLT DstTy = MRI.getType(DstReg);
6520 const unsigned NumParts = DstBits / TargetBits;
6522 assert(DstBits % TargetBits == 0 &&
"Target type must evenly divide source");
6532 MIRBuilder.buildMergeLikeInstr(DstReg, SrcParts);
6533 MI.eraseFromParent();
6538 const unsigned ShiftWords = Amt.
getZExtValue() / TargetBits;
6539 const unsigned ShiftBits = Amt.
getZExtValue() % TargetBits;
6545 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - ShiftBits).getReg(0);
6549 if (
MI.getOpcode() == TargetOpcode::G_ASHR)
6552 .buildAShr(TargetTy, SrcParts[SrcParts.
size() - 1],
6553 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1))
6557 for (
unsigned I = 0;
I < NumParts; ++
I)
6559 Params, TargetTy, ShiftAmtTy);
6561 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6562 MI.eraseFromParent();
6571 LLT DstTy = MRI.getType(DstReg);
6572 LLT ShiftAmtTy = MRI.getType(AmtReg);
6576 const unsigned NumParts = DstBits / TargetBits;
6578 assert(DstBits % TargetBits == 0 &&
"Target type must evenly divide source");
6595 auto ZeroAmtConst =
MIRBuilder.buildConstant(ShiftAmtTy, 0);
6607 unsigned TargetBitsLog2 =
Log2_32(TargetBits);
6608 auto TargetBitsLog2Const =
6609 MIRBuilder.buildConstant(ShiftAmtTy, TargetBitsLog2);
6610 auto TargetBitsMask =
MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6613 MIRBuilder.buildLShr(ShiftAmtTy, AmtReg, TargetBitsLog2Const).getReg(0);
6615 MIRBuilder.buildAnd(ShiftAmtTy, AmtReg, TargetBitsMask).getReg(0);
6623 if (
MI.getOpcode() == TargetOpcode::G_ASHR) {
6624 auto TargetBitsMinusOneConst =
6625 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6627 .buildAShr(TargetTy, SrcParts[NumParts - 1],
6628 TargetBitsMinusOneConst)
6631 FillValue = ZeroReg;
6639 for (
unsigned I = 0;
I < NumParts; ++
I) {
6641 Register InBoundsResult = FillValue;
6651 for (
unsigned K = 0; K < NumParts; ++K) {
6652 auto WordShiftKConst =
MIRBuilder.buildConstant(ShiftAmtTy, K);
6654 WordShift, WordShiftKConst);
6666 switch (
MI.getOpcode()) {
6667 case TargetOpcode::G_SHL:
6668 MainSrcIdx = (int)
I - (
int)K;
6669 CarrySrcIdx = MainSrcIdx - 1;
6671 case TargetOpcode::G_LSHR:
6672 case TargetOpcode::G_ASHR:
6673 MainSrcIdx = (int)
I + (
int)K;
6674 CarrySrcIdx = MainSrcIdx + 1;
6682 if (MainSrcIdx >= 0 && MainSrcIdx < (
int)NumParts) {
6683 Register MainOp = SrcParts[MainSrcIdx];
6687 if (CarrySrcIdx >= 0 && CarrySrcIdx < (
int)NumParts)
6688 CarryOp = SrcParts[CarrySrcIdx];
6689 else if (
MI.getOpcode() == TargetOpcode::G_ASHR &&
6690 CarrySrcIdx >= (
int)NumParts)
6691 CarryOp = FillValue;
6697 ResultForK = FillValue;
6703 .buildSelect(TargetTy, IsWordShiftK, ResultForK, InBoundsResult)
6710 .buildSelect(TargetTy, IsZeroShift, SrcParts[
I], InBoundsResult)
6714 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6715 MI.eraseFromParent();
6722 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
6725 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
6740 assert(Ty.isScalar() &&
"Expected scalar type to make neutral element for");
6745 "getNeutralElementForVecReduce called with invalid opcode!");
6746 case TargetOpcode::G_VECREDUCE_ADD:
6747 case TargetOpcode::G_VECREDUCE_OR:
6748 case TargetOpcode::G_VECREDUCE_XOR:
6749 case TargetOpcode::G_VECREDUCE_UMAX:
6751 case TargetOpcode::G_VECREDUCE_MUL:
6753 case TargetOpcode::G_VECREDUCE_AND:
6754 case TargetOpcode::G_VECREDUCE_UMIN:
6757 case TargetOpcode::G_VECREDUCE_SMAX:
6760 case TargetOpcode::G_VECREDUCE_SMIN:
6763 case TargetOpcode::G_VECREDUCE_FADD:
6765 case TargetOpcode::G_VECREDUCE_FMUL:
6767 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6768 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6769 assert(
false &&
"getNeutralElementForVecReduce unimplemented for "
6770 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6778 unsigned Opc =
MI.getOpcode();
6780 case TargetOpcode::G_IMPLICIT_DEF:
6781 case TargetOpcode::G_LOAD: {
6789 case TargetOpcode::G_STORE:
6796 case TargetOpcode::G_AND:
6797 case TargetOpcode::G_OR:
6798 case TargetOpcode::G_XOR:
6799 case TargetOpcode::G_ADD:
6800 case TargetOpcode::G_SUB:
6801 case TargetOpcode::G_MUL:
6802 case TargetOpcode::G_FADD:
6803 case TargetOpcode::G_FSUB:
6804 case TargetOpcode::G_FMUL:
6805 case TargetOpcode::G_FDIV:
6806 case TargetOpcode::G_FCOPYSIGN:
6807 case TargetOpcode::G_UADDSAT:
6808 case TargetOpcode::G_USUBSAT:
6809 case TargetOpcode::G_SADDSAT:
6810 case TargetOpcode::G_SSUBSAT:
6811 case TargetOpcode::G_SMIN:
6812 case TargetOpcode::G_SMAX:
6813 case TargetOpcode::G_UMIN:
6814 case TargetOpcode::G_UMAX:
6815 case TargetOpcode::G_FMINNUM:
6816 case TargetOpcode::G_FMAXNUM:
6817 case TargetOpcode::G_FMINNUM_IEEE:
6818 case TargetOpcode::G_FMAXNUM_IEEE:
6819 case TargetOpcode::G_FMINIMUM:
6820 case TargetOpcode::G_FMAXIMUM:
6821 case TargetOpcode::G_FMINIMUMNUM:
6822 case TargetOpcode::G_FMAXIMUMNUM:
6823 case TargetOpcode::G_STRICT_FADD:
6824 case TargetOpcode::G_STRICT_FSUB:
6825 case TargetOpcode::G_STRICT_FMUL: {
6833 case TargetOpcode::G_SHL:
6834 case TargetOpcode::G_ASHR:
6835 case TargetOpcode::G_LSHR: {
6841 MRI.getType(
MI.getOperand(2).getReg()).getElementType());
6847 case TargetOpcode::G_FMA:
6848 case TargetOpcode::G_STRICT_FMA:
6849 case TargetOpcode::G_FSHR:
6850 case TargetOpcode::G_FSHL: {
6859 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6860 case TargetOpcode::G_EXTRACT:
6867 case TargetOpcode::G_INSERT:
6868 case TargetOpcode::G_INSERT_VECTOR_ELT:
6869 case TargetOpcode::G_FREEZE:
6870 case TargetOpcode::G_FNEG:
6871 case TargetOpcode::G_FABS:
6872 case TargetOpcode::G_FSQRT:
6873 case TargetOpcode::G_FCEIL:
6874 case TargetOpcode::G_FFLOOR:
6875 case TargetOpcode::G_FNEARBYINT:
6876 case TargetOpcode::G_FRINT:
6877 case TargetOpcode::G_INTRINSIC_ROUND:
6878 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6879 case TargetOpcode::G_INTRINSIC_TRUNC:
6880 case TargetOpcode::G_BITREVERSE:
6881 case TargetOpcode::G_BSWAP:
6882 case TargetOpcode::G_FCANONICALIZE:
6883 case TargetOpcode::G_SEXT_INREG:
6884 case TargetOpcode::G_ABS:
6885 case TargetOpcode::G_CTLZ:
6886 case TargetOpcode::G_CTPOP:
6894 case TargetOpcode::G_SELECT: {
6895 auto [DstReg, DstTy, CondReg, CondTy] =
MI.getFirst2RegLLTs();
6897 if (!CondTy.isScalar() ||
6903 auto ShufSplat =
MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6905 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6910 if (CondTy.isVector())
6920 case TargetOpcode::G_UNMERGE_VALUES:
6922 case TargetOpcode::G_PHI:
6924 case TargetOpcode::G_SHUFFLE_VECTOR:
6926 case TargetOpcode::G_BUILD_VECTOR: {
6928 for (
auto Op :
MI.uses()) {
6936 MIRBuilder.buildDeleteTrailingVectorElements(
6937 MI.getOperand(0).getReg(),
MIRBuilder.buildInstr(
Opc, {MoreTy}, Elts));
6938 MI.eraseFromParent();
6941 case TargetOpcode::G_SEXT:
6942 case TargetOpcode::G_ZEXT:
6943 case TargetOpcode::G_ANYEXT:
6944 case TargetOpcode::G_TRUNC:
6945 case TargetOpcode::G_FPTRUNC:
6946 case TargetOpcode::G_FPEXT:
6947 case TargetOpcode::G_FPTOSI:
6948 case TargetOpcode::G_FPTOUI:
6949 case TargetOpcode::G_FPTOSI_SAT:
6950 case TargetOpcode::G_FPTOUI_SAT:
6951 case TargetOpcode::G_SITOFP:
6952 case TargetOpcode::G_UITOFP: {
6959 MRI.getType(
MI.getOperand(1).getReg()).getElementType());
6962 MRI.getType(
MI.getOperand(0).getReg()).getElementType());
6970 case TargetOpcode::G_ICMP:
6971 case TargetOpcode::G_FCMP: {
6979 MRI.getType(
MI.getOperand(0).getReg()).getElementType());
6984 case TargetOpcode::G_BITCAST: {
6988 LLT SrcTy = MRI.getType(
MI.getOperand(1).getReg());
6989 LLT DstTy = MRI.getType(
MI.getOperand(0).getReg());
7005 case TargetOpcode::G_VECREDUCE_FADD:
7006 case TargetOpcode::G_VECREDUCE_FMUL:
7007 case TargetOpcode::G_VECREDUCE_ADD:
7008 case TargetOpcode::G_VECREDUCE_MUL:
7009 case TargetOpcode::G_VECREDUCE_AND:
7010 case TargetOpcode::G_VECREDUCE_OR:
7011 case TargetOpcode::G_VECREDUCE_XOR:
7012 case TargetOpcode::G_VECREDUCE_SMAX:
7013 case TargetOpcode::G_VECREDUCE_SMIN:
7014 case TargetOpcode::G_VECREDUCE_UMAX:
7015 case TargetOpcode::G_VECREDUCE_UMIN: {
7016 LLT OrigTy = MRI.getType(
MI.getOperand(1).getReg());
7018 auto NewVec =
MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
7019 auto NeutralElement = getNeutralElementForVecReduce(
7025 auto Idx =
MIRBuilder.buildConstant(IdxTy, i);
7026 NewVec =
MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
7027 NeutralElement, Idx);
7031 MO.
setReg(NewVec.getReg(0));
7043 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7045 unsigned MaskNumElts = Mask.size();
7046 unsigned SrcNumElts = SrcTy.getNumElements();
7049 if (MaskNumElts == SrcNumElts)
7052 if (MaskNumElts < SrcNumElts) {
7060 MIRBuilder.buildShuffleVector(
MI.getOperand(0).getReg(),
7061 MI.getOperand(1).getReg(),
7062 MI.getOperand(2).getReg(), NewMask);
7063 MI.eraseFromParent();
7068 unsigned PaddedMaskNumElts =
alignTo(MaskNumElts, SrcNumElts);
7069 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
7078 MOps1[0] =
MI.getOperand(1).getReg();
7079 MOps2[0] =
MI.getOperand(2).getReg();
7081 auto Src1 =
MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
7082 auto Src2 =
MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
7086 for (
unsigned I = 0;
I != MaskNumElts; ++
I) {
7088 if (Idx >=
static_cast<int>(SrcNumElts))
7089 Idx += PaddedMaskNumElts - SrcNumElts;
7094 if (MaskNumElts != PaddedMaskNumElts) {
7096 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
7099 for (
unsigned I = 0;
I < MaskNumElts; ++
I) {
7101 MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle,
I)
7106 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
7109 MI.eraseFromParent();
7115 unsigned int TypeIdx,
LLT MoreTy) {
7116 auto [DstTy, Src1Ty, Src2Ty] =
MI.getFirst3LLTs();
7118 unsigned NumElts = DstTy.getNumElements();
7121 if (DstTy.isVector() && Src1Ty.isVector() &&
7122 DstTy.getNumElements() != Src1Ty.getNumElements()) {
7130 if (DstTy != Src1Ty || DstTy != Src2Ty)
7138 for (
unsigned I = 0;
I != NumElts; ++
I) {
7140 if (Idx <
static_cast<int>(NumElts))
7143 NewMask[
I] = Idx - NumElts + WidenNumElts;
7147 MIRBuilder.buildShuffleVector(
MI.getOperand(0).getReg(),
7148 MI.getOperand(1).getReg(),
7149 MI.getOperand(2).getReg(), NewMask);
7150 MI.eraseFromParent();
7159 unsigned SrcParts = Src1Regs.
size();
7160 unsigned DstParts = DstRegs.
size();
7162 unsigned DstIdx = 0;
7164 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
7165 DstRegs[DstIdx] = FactorSum;
7170 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
7172 for (
unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
7173 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
7175 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
7181 unsigned LowStart = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
7182 unsigned LowEnd = std::min(DstIdx, SrcParts - 1);
7183 for (
unsigned RevI = LowEnd + 1; RevI != LowStart; --RevI) {
7184 unsigned i = RevI - 1;
7186 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
7196 if (DstIdx != DstParts - 1) {
7197 MachineInstrBuilder Uaddo =
7198 B.buildUAddo(NarrowTy,
LLT::integer(1), Factors[0], Factors[1]);
7199 FactorSum = Uaddo.
getReg(0);
7200 CarrySum =
B.buildZExt(NarrowTy, Uaddo.
getReg(1)).getReg(0);
7201 for (
unsigned i = 2; i < Factors.
size(); ++i) {
7202 MachineInstrBuilder Uaddo =
7203 B.buildUAddo(NarrowTy,
LLT::integer(1), FactorSum, Factors[i]);
7204 FactorSum = Uaddo.
getReg(0);
7205 MachineInstrBuilder Carry =
B.buildZExt(NarrowTy, Uaddo.
getReg(1));
7206 CarrySum =
B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
7210 FactorSum =
B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
7211 for (
unsigned i = 2; i < Factors.
size(); ++i)
7212 FactorSum =
B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
7215 CarrySumPrevDstIdx = CarrySum;
7216 DstRegs[DstIdx] = FactorSum;
7228 LLT DstType = MRI.getType(DstReg);
7230 if (DstType.isVector())
7233 unsigned Opcode =
MI.getOpcode();
7234 unsigned OpO, OpE, OpF;
7236 case TargetOpcode::G_SADDO:
7237 case TargetOpcode::G_SADDE:
7238 case TargetOpcode::G_UADDO:
7239 case TargetOpcode::G_UADDE:
7240 case TargetOpcode::G_ADD:
7241 OpO = TargetOpcode::G_UADDO;
7242 OpE = TargetOpcode::G_UADDE;
7243 OpF = TargetOpcode::G_UADDE;
7244 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
7245 OpF = TargetOpcode::G_SADDE;
7247 case TargetOpcode::G_SSUBO:
7248 case TargetOpcode::G_SSUBE:
7249 case TargetOpcode::G_USUBO:
7250 case TargetOpcode::G_USUBE:
7251 case TargetOpcode::G_SUB:
7252 OpO = TargetOpcode::G_USUBO;
7253 OpE = TargetOpcode::G_USUBE;
7254 OpF = TargetOpcode::G_USUBE;
7255 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
7256 OpF = TargetOpcode::G_SSUBE;
7263 unsigned NumDefs =
MI.getNumExplicitDefs();
7264 Register Src1 =
MI.getOperand(NumDefs).getReg();
7265 Register Src2 =
MI.getOperand(NumDefs + 1).getReg();
7268 CarryDst =
MI.getOperand(1).getReg();
7269 if (
MI.getNumOperands() == NumDefs + 3)
7270 CarryIn =
MI.getOperand(NumDefs + 2).getReg();
7272 LLT RegTy = MRI.getType(
MI.getOperand(0).getReg());
7273 LLT LeftoverTy, DummyTy;
7275 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
7280 int NarrowParts = Src1Regs.
size();
7281 Src1Regs.
append(Src1Left);
7282 Src2Regs.
append(Src2Left);
7285 for (
int i = 0, e = Src1Regs.
size(); i != e; ++i) {
7287 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
7290 if (i == e - 1 && CarryDst)
7291 CarryOut = CarryDst;
7293 CarryOut = MRI.createGenericVirtualRegister(
LLT::integer(1));
7296 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
7297 {Src1Regs[i], Src2Regs[i]});
7298 }
else if (i == e - 1) {
7299 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
7300 {Src1Regs[i], Src2Regs[i], CarryIn});
7302 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
7303 {Src1Regs[i], Src2Regs[i], CarryIn});
7309 insertParts(
MI.getOperand(0).getReg(), RegTy, NarrowTy,
7310 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
7311 ArrayRef(DstRegs).drop_front(NarrowParts));
7313 MI.eraseFromParent();
7319 auto [DstReg, Src1, Src2] =
MI.getFirst3Regs();
7321 LLT Ty = MRI.getType(DstReg);
7325 unsigned Size = Ty.getSizeInBits();
7327 if (
Size % NarrowSize != 0)
7330 unsigned NumParts =
Size / NarrowSize;
7331 bool IsMulHigh =
MI.getOpcode() == TargetOpcode::G_UMULH;
7332 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
7338 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
7342 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7343 MI.eraseFromParent();
7353 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_FPTOSI;
7356 LLT SrcTy = MRI.getType(Src);
7367 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
7380 int64_t SizeOp1 = MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
7383 if (SizeOp1 % NarrowSize != 0)
7385 int NumParts = SizeOp1 / NarrowSize;
7388 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
7392 uint64_t OpStart =
MI.getOperand(2).getImm();
7393 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7394 for (
int i = 0; i < NumParts; ++i) {
7395 unsigned SrcStart = i * NarrowSize;
7397 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
7400 }
else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7408 int64_t ExtractOffset;
7410 if (OpStart < SrcStart) {
7412 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
7414 ExtractOffset = OpStart - SrcStart;
7415 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
7419 if (ExtractOffset != 0 || SegSize != NarrowSize) {
7421 SegReg = MRI.createGenericVirtualRegister(
LLT::scalar(SegSize));
7422 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
7429 if (MRI.getType(DstReg).isVector())
7430 MIRBuilder.buildBuildVector(DstReg, DstRegs);
7431 else if (DstRegs.
size() > 1)
7432 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7435 MI.eraseFromParent();
7447 LLT RegTy = MRI.getType(
MI.getOperand(0).getReg());
7449 extractParts(
MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
7452 SrcRegs.
append(LeftoverRegs);
7456 uint64_t OpStart =
MI.getOperand(3).getImm();
7457 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7458 for (
int I = 0, E = SrcRegs.
size();
I != E; ++
I) {
7459 unsigned DstStart =
I * NarrowSize;
7461 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7469 if (MRI.getType(SrcRegs[
I]) == LeftoverTy) {
7471 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
7475 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
7483 int64_t ExtractOffset, InsertOffset;
7485 if (OpStart < DstStart) {
7487 ExtractOffset = DstStart - OpStart;
7488 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
7490 InsertOffset = OpStart - DstStart;
7493 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
7497 if (ExtractOffset != 0 || SegSize != OpSize) {
7499 SegReg = MRI.createGenericVirtualRegister(
LLT::scalar(SegSize));
7500 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
7503 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
7504 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
7512 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
7515 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7517 MI.eraseFromParent();
7525 LLT DstTy = MRI.getType(DstReg);
7527 assert(
MI.getNumOperands() == 3 && TypeIdx == 0);
7533 if (!
extractParts(
MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
7534 Src0Regs, Src0LeftoverRegs,
MIRBuilder, MRI))
7538 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
7539 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
7542 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
7543 auto Inst =
MIRBuilder.buildInstr(
MI.getOpcode(), {NarrowTy},
7544 {Src0Regs[I], Src1Regs[I]});
7548 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
7551 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
7552 DstLeftoverRegs.
push_back(Inst.getReg(0));
7555 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7556 LeftoverTy, DstLeftoverRegs);
7558 MI.eraseFromParent();
7568 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
7570 LLT DstTy = MRI.getType(DstReg);
7575 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
7576 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
MI.getOpcode());
7577 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
7579 MI.eraseFromParent();
7589 Register CondReg =
MI.getOperand(1).getReg();
7590 LLT CondTy = MRI.getType(CondReg);
7591 if (CondTy.isVector())
7595 LLT DstTy = MRI.getType(DstReg);
7601 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
7602 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
7606 if (!
extractParts(
MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
7607 Src2Regs, Src2LeftoverRegs,
MIRBuilder, MRI))
7610 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
7612 CondReg, Src1Regs[
I], Src2Regs[
I]);
7616 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
7618 LeftoverTy, CondReg, Src1LeftoverRegs[
I], Src2LeftoverRegs[
I]);
7622 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7623 LeftoverTy, DstLeftoverRegs);
7625 MI.eraseFromParent();
7635 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7638 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7639 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_POISON;
7642 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
7644 auto C_0 =
B.buildConstant(NarrowTy, 0);
7646 UnmergeSrc.getReg(1), C_0);
7647 auto LoCTLZ = IsUndef ?
B.buildCTLZ_ZERO_POISON(DstTy, UnmergeSrc.getReg(0))
7648 :
B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
7649 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
7650 auto HiIsZeroCTLZ =
B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
7651 auto HiCTLZ =
B.buildCTLZ_ZERO_POISON(DstTy, UnmergeSrc.getReg(1));
7652 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
7654 MI.eraseFromParent();
7667 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7670 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7671 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_POISON;
7674 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
7676 auto C_0 =
B.buildConstant(NarrowTy, 0);
7678 UnmergeSrc.getReg(0), C_0);
7679 auto HiCTTZ = IsUndef ?
B.buildCTTZ_ZERO_POISON(DstTy, UnmergeSrc.getReg(1))
7680 :
B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
7681 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
7682 auto LoIsZeroCTTZ =
B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
7683 auto LoCTTZ =
B.buildCTTZ_ZERO_POISON(DstTy, UnmergeSrc.getReg(0));
7684 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
7686 MI.eraseFromParent();
7699 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7702 if (!SrcTy.isScalar() || SrcTy.getSizeInBits() != 2 * NarrowSize)
7707 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
7711 auto ShAmt =
B.buildConstant(NarrowTy, NarrowSize - 1);
7712 auto Sign =
B.buildAShr(NarrowTy,
Hi, ShAmt);
7720 auto LoInv =
B.buildXor(DstTy,
Lo, Sign);
7721 auto LoCTLZ =
B.buildCTLZ(DstTy, LoInv);
7724 auto C_NarrowSizeM1 =
B.buildConstant(DstTy, NarrowSize - 1);
7725 auto HiIsSignCTLS =
B.buildAdd(DstTy, LoCTLZ, C_NarrowSizeM1);
7727 auto HiCTLS =
B.buildCTLS(DstTy,
Hi);
7729 B.buildSelect(DstReg, HiIsSign, HiIsSignCTLS, HiCTLS);
7731 MI.eraseFromParent();
7741 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7744 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7745 auto UnmergeSrc =
MIRBuilder.buildUnmerge(NarrowTy,
MI.getOperand(1));
7747 auto LoCTPOP =
MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
7748 auto HiCTPOP =
MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
7749 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
7751 MI.eraseFromParent();
7766 LLT ExpTy = MRI.getType(ExpReg);
7771 auto MinExp =
B.buildConstant(ExpTy,
minIntN(ClampSize));
7772 auto ClampMin =
B.buildSMax(ExpTy, ExpReg, MinExp);
7773 auto MaxExp =
B.buildConstant(ExpTy,
maxIntN(ClampSize));
7774 auto Clamp =
B.buildSMin(ExpTy, ClampMin, MaxExp);
7776 auto Trunc =
B.buildTrunc(NarrowTy, Clamp);
7778 MI.getOperand(2).setReg(Trunc.getReg(0));
7785 unsigned Opc =
MI.getOpcode();
7788 auto QAction = LI.getAction(Q).Action;
7794 case TargetOpcode::G_CTLZ_ZERO_POISON: {
7797 MI.setDesc(
TII.get(TargetOpcode::G_CTLZ));
7801 case TargetOpcode::G_CTLZ: {
7802 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7803 unsigned Len = SrcTy.getScalarSizeInBits();
7805 if (isSupported({TargetOpcode::G_CTLZ_ZERO_POISON, {DstTy, SrcTy}})) {
7807 auto CtlzZU =
MIRBuilder.buildCTLZ_ZERO_POISON(DstTy, SrcReg);
7808 auto ZeroSrc =
MIRBuilder.buildConstant(SrcTy, 0);
7811 auto LenConst =
MIRBuilder.buildConstant(DstTy, Len);
7812 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
7813 MI.eraseFromParent();
7829 for (
unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7830 auto MIBShiftAmt =
MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7833 Op = MIBOp.getReg(0);
7838 MI.eraseFromParent();
7841 case TargetOpcode::G_CTTZ_ZERO_POISON: {
7844 MI.setDesc(
TII.get(TargetOpcode::G_CTTZ));
7848 case TargetOpcode::G_CTTZ: {
7849 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7851 unsigned Len = SrcTy.getScalarSizeInBits();
7852 if (isSupported({TargetOpcode::G_CTTZ_ZERO_POISON, {DstTy, SrcTy}})) {
7855 auto CttzZU =
MIRBuilder.buildCTTZ_ZERO_POISON(DstTy, SrcReg);
7856 auto Zero =
MIRBuilder.buildConstant(SrcTy, 0);
7859 auto LenConst =
MIRBuilder.buildConstant(DstTy, Len);
7860 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
7861 MI.eraseFromParent();
7868 auto MIBCstNeg1 =
MIRBuilder.buildConstant(SrcTy, -1);
7869 auto MIBNot =
MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7871 SrcTy, MIBNot,
MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7872 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7873 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7874 auto MIBCstLen =
MIRBuilder.buildConstant(SrcTy, Len);
7877 MI.eraseFromParent();
7881 MI.setDesc(
TII.get(TargetOpcode::G_CTPOP));
7882 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7886 case TargetOpcode::G_CTPOP: {
7888 LLT Ty = MRI.getType(SrcReg);
7889 unsigned Size = Ty.getScalarSizeInBits();
7901 auto C_1 =
B.buildConstant(Ty, 1);
7902 auto B2Set1LoTo1Hi =
B.buildLShr(Ty, SrcReg, C_1);
7904 auto C_B2Mask1HiTo0 =
B.buildConstant(Ty, B2Mask1HiTo0);
7905 auto B2Count1Hi =
B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7906 auto B2Count =
B.buildSub(Ty, SrcReg, B2Count1Hi);
7910 auto C_2 =
B.buildConstant(Ty, 2);
7911 auto B4Set2LoTo2Hi =
B.buildLShr(Ty, B2Count, C_2);
7913 auto C_B4Mask2HiTo0 =
B.buildConstant(Ty, B4Mask2HiTo0);
7914 auto B4HiB2Count =
B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7915 auto B4LoB2Count =
B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7916 auto B4Count =
B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7923 auto C_4 =
B.buildConstant(Ty, 4);
7924 auto B8HiB4Count =
B.buildLShr(Ty, B4Count, C_4);
7925 auto B8CountDirty4Hi =
B.buildAdd(Ty, B8HiB4Count, B4Count);
7927 auto C_B8Mask4HiTo0 =
B.buildConstant(Ty, B8Mask4HiTo0);
7928 auto B8Count =
B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7930 assert(
Size <= 128 &&
"Scalar size is too large for CTPOP lower algorithm");
7933 if (
Size == 16 && !Ty.isVector()) {
7935 auto C_8 =
B.buildConstant(Ty, 8);
7936 auto HighSum =
B.buildLShr(Ty, B8Count, C_8);
7937 auto Res =
B.buildAdd(Ty, B8Count, HighSum);
7938 B.buildAnd(
MI.getOperand(0).getReg(), Res,
B.buildConstant(Ty, 0xFF));
7939 MI.eraseFromParent();
7948 auto C_SizeM8 =
B.buildConstant(Ty,
Size - 8);
7950 auto IsMulSupported = [
this](
const LLT Ty) {
7951 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7954 if (IsMulSupported(Ty)) {
7955 auto ResTmp =
B.buildMul(Ty, B8Count, MulMask);
7956 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7958 auto ResTmp = B8Count;
7959 for (
unsigned Shift = 8; Shift <
Size; Shift *= 2) {
7960 auto ShiftC =
B.buildConstant(Ty, Shift);
7961 auto Shl =
B.buildShl(Ty, ResTmp, ShiftC);
7962 ResTmp =
B.buildAdd(Ty, ResTmp, Shl);
7964 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7966 MI.eraseFromParent();
7969 case TargetOpcode::G_CTLS: {
7970 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7974 MIRBuilder.buildConstant(SrcTy, SrcTy.getScalarSizeInBits() - 1);
7975 auto OneC =
MIRBuilder.buildConstant(DstTy, 1);
7977 auto Shr =
MIRBuilder.buildAShr(SrcTy, SrcReg, SignIdxC);
7983 MI.eraseFromParent();
8004 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
8005 LLT Ty = MRI.getType(Dst);
8006 LLT ShTy = MRI.getType(Z);
8013 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
8014 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
8019 auto Zero =
MIRBuilder.buildConstant(ShTy, 0);
8020 Z =
MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
8024 auto One =
MIRBuilder.buildConstant(ShTy, 1);
8037 MI.eraseFromParent();
8043 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
8044 LLT Ty = MRI.getType(Dst);
8045 LLT ShTy = MRI.getType(Z);
8048 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
8058 auto BitWidthC =
MIRBuilder.buildConstant(ShTy, BW);
8059 ShAmt =
MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
8060 InvShAmt =
MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
8061 ShX =
MIRBuilder.buildShl(Ty,
X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
8062 ShY =
MIRBuilder.buildLShr(Ty,
Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
8066 auto Mask =
MIRBuilder.buildConstant(ShTy, BW - 1);
8069 ShAmt =
MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
8072 InvShAmt =
MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
8074 auto BitWidthC =
MIRBuilder.buildConstant(ShTy, BW);
8075 ShAmt =
MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
8076 InvShAmt =
MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
8079 auto One =
MIRBuilder.buildConstant(ShTy, 1);
8081 ShX =
MIRBuilder.buildShl(Ty,
X, ShAmt).getReg(0);
8083 ShY =
MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
8086 ShX =
MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
8087 ShY =
MIRBuilder.buildLShr(Ty,
Y, ShAmt).getReg(0);
8092 MI.eraseFromParent();
8103 LLT Ty = MRI.getType(Dst);
8104 LLT ShTy = MRI.getType(
MI.getOperand(3).getReg());
8106 bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
8107 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
8110 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action ==
Lower)
8111 return lowerFunnelShiftAsShifts(
MI);
8115 if (Result == UnableToLegalize)
8116 return lowerFunnelShiftAsShifts(
MI);
8121 auto [Dst, Src] =
MI.getFirst2Regs();
8122 LLT DstTy = MRI.getType(Dst);
8123 LLT SrcTy = MRI.getType(Src);
8127 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
8135 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
8139 auto NewExt =
MIRBuilder.buildInstr(
MI.getOpcode(), {MidTy}, {Src});
8143 auto UnmergeSrc =
MIRBuilder.buildUnmerge(EltTy, NewExt);
8148 auto ZExtRes1 =
MIRBuilder.buildInstr(
MI.getOpcode(), {ZExtResTy},
8149 {UnmergeSrc.getReg(0)});
8150 auto ZExtRes2 =
MIRBuilder.buildInstr(
MI.getOpcode(), {ZExtResTy},
8151 {UnmergeSrc.getReg(1)});
8154 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
8156 MI.eraseFromParent();
8173 assert(
MI.getOpcode() == TargetOpcode::G_TRUNC);
8177 LLT DstTy = MRI.getType(DstReg);
8178 LLT SrcTy = MRI.getType(SrcReg);
8186 SrcTy.getElementCount().divideCoefficientBy(2));
8199 Src =
MIRBuilder.buildTrunc(InterTy, Src).getReg(0);
8211 MI.eraseFromParent();
8220 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
8221 auto Zero =
MIRBuilder.buildConstant(AmtTy, 0);
8222 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
8223 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8224 auto Neg =
MIRBuilder.buildSub(AmtTy, Zero, Amt);
8225 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
8226 MI.eraseFromParent();
8231 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
8233 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
8234 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
8239 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8240 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
8242 return lowerRotateWithReverseRotate(
MI);
8245 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8246 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8247 bool IsFShLegal =
false;
8248 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
8249 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
8253 MI.eraseFromParent();
8258 return buildFunnelShift(FShOpc, Dst, Src, Amt);
8261 return buildFunnelShift(RevFsh, Dst, Src, Amt);
8266 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
8267 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
8268 auto BitWidthMinusOneC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits - 1);
8274 auto NegAmt = MIRBuilder.
buildSub(AmtTy, Zero, Amt);
8275 auto ShAmt = MIRBuilder.
buildAnd(AmtTy, Amt, BitWidthMinusOneC);
8277 auto RevAmt = MIRBuilder.
buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
8283 auto BitWidthC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits);
8284 auto ShAmt = MIRBuilder.
buildURem(AmtTy, Amt, BitWidthC);
8286 auto RevAmt = MIRBuilder.
buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
8288 auto Inner = MIRBuilder.
buildInstr(RevShiftOpc, {DstTy}, {Src, One});
8293 MI.eraseFromParent();
8301 auto [Dst, Src] =
MI.getFirst2Regs();
8306 assert(MRI.getType(Src) ==
S64 && MRI.getType(Dst) ==
S32);
8334 auto Mask1 =
MIRBuilder.buildConstant(
S64, 0xffffffffffULL);
8347 auto Select0 =
MIRBuilder.buildSelect(
S32, TCmp, VTrunc1, Zero32);
8351 MI.eraseFromParent();
8359 auto [Dst, Src] =
MI.getFirst2Regs();
8364 assert(MRI.getType(Src) ==
S64 && MRI.getType(Dst) ==
S32);
8377 auto RoundedHalved =
MIRBuilder.buildOr(
S64, Halved, LowerBit);
8379 auto LargeResult =
MIRBuilder.buildFAdd(
S32, HalvedFP, HalvedFP);
8384 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
8386 MI.eraseFromParent();
8394 auto [Dst, Src] =
MI.getFirst2Regs();
8398 assert(MRI.getType(Src) ==
S64 && MRI.getType(Dst) ==
S64);
8409 auto TwoP52 =
MIRBuilder.buildConstant(
S64, UINT64_C(0x4330000000000000));
8410 auto TwoP84 =
MIRBuilder.buildConstant(
S64, UINT64_C(0x4530000000000000));
8412 auto TwoP52P84FP =
MIRBuilder.buildFConstant(
S64, TwoP52P84);
8419 auto HighBitsFP =
MIRBuilder.buildOr(
S64, TwoP84, HighBits);
8420 auto Scratch =
MIRBuilder.buildFSub(
S64, HighBitsFP, TwoP52P84FP);
8421 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
8423 MI.eraseFromParent();
8434 SrcTy.changeElementType(
LLT::floatIEEE(SrcTy.getScalarSizeInBits()));
8435 auto M1 =
MI.getOpcode() == TargetOpcode::G_UITOFP
8441 MI.eraseFromParent();
8446 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8449 auto True =
MIRBuilder.buildFConstant(DstTy, 1.0);
8450 auto False =
MIRBuilder.buildFConstant(DstTy, 0.0);
8451 MIRBuilder.buildSelect(Dst, Src, True, False);
8452 MI.eraseFromParent();
8456 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8476 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8483 auto True =
MIRBuilder.buildFConstant(DstTy, -1.0);
8484 auto False =
MIRBuilder.buildFConstant(DstTy, 0.0);
8485 MIRBuilder.buildSelect(Dst, Src, True, False);
8486 MI.eraseFromParent();
8490 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8496 if (DstTy.getScalarSizeInBits() == 32) {
8503 auto SignBit =
MIRBuilder.buildConstant(I64, 63);
8504 auto S =
MIRBuilder.buildAShr(I64, L, SignBit);
8506 auto LPlusS =
MIRBuilder.buildAdd(I64, L, S);
8513 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
8514 MI.eraseFromParent();
8522 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8526 if (SrcTy !=
S64 && SrcTy !=
S32)
8528 if (DstTy !=
S32 && DstTy !=
S64)
8555 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
8557 MI.eraseFromParent();
8562 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8567 if (SrcTy.getScalarType() !=
S32 || DstTy.getScalarType() !=
S64)
8574 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
8576 auto ExponentMask =
MIRBuilder.buildConstant(SrcTy, 0x7F800000);
8577 auto ExponentLoBit =
MIRBuilder.buildConstant(SrcTy, 23);
8579 auto AndExpMask =
MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
8580 auto ExponentBits =
MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
8582 auto SignMask =
MIRBuilder.buildConstant(SrcTy,
8584 auto AndSignMask =
MIRBuilder.buildAnd(SrcTy, Src, SignMask);
8585 auto SignLowBit =
MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
8586 auto Sign =
MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
8589 auto MantissaMask =
MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
8590 auto AndMantissaMask =
MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
8591 auto K =
MIRBuilder.buildConstant(SrcTy, 0x00800000);
8593 auto R =
MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
8596 auto Bias =
MIRBuilder.buildConstant(SrcTy, 127);
8601 auto Shl =
MIRBuilder.buildShl(DstTy, R, SubExponent);
8602 auto Srl =
MIRBuilder.buildLShr(DstTy, R, ExponentSub);
8608 R =
MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
8610 auto XorSign =
MIRBuilder.buildXor(DstTy, R, Sign);
8611 auto Ret =
MIRBuilder.buildSub(DstTy, XorSign, Sign);
8613 auto ZeroSrcTy =
MIRBuilder.buildConstant(SrcTy, 0);
8618 auto ZeroDstTy =
MIRBuilder.buildConstant(DstTy, 0);
8619 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
8621 MI.eraseFromParent();
8627 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8629 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
8630 unsigned SatWidth = DstTy.getScalarSizeInBits();
8634 APInt MinInt, MaxInt;
8657 if (AreExactFloatBounds) {
8659 auto MaxC =
MIRBuilder.buildFConstant(SrcTy, MinFloat);
8662 auto Max =
MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
8664 auto MinC =
MIRBuilder.buildFConstant(SrcTy, MaxFloat);
8673 MI.eraseFromParent();
8678 auto FpToInt =
MIRBuilder.buildFPTOSI(DstTy, Min);
8683 MI.eraseFromParent();
8690 auto FpToInt = IsSigned ?
MIRBuilder.buildFPTOSI(DstTy, Src)
8698 DstTy, ULT,
MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
8708 MI.eraseFromParent();
8714 DstTy, OGT,
MIRBuilder.buildConstant(DstTy, MaxInt), Max);
8718 MI.eraseFromParent();
8725 assert((
MI.getOpcode() == TargetOpcode::G_FPEXT ||
8726 MI.getOpcode() == TargetOpcode::G_FPTRUNC) &&
8727 "Only G_FPEXT and G_FPTRUNC are expected");
8729 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
8734 if (
MI.getOpcode() == TargetOpcode::G_FPEXT) {
8736 StoreOpc = TargetOpcode::G_STORE;
8737 LoadOpc = TargetOpcode::G_FPEXTLOAD;
8740 StoreOpc = TargetOpcode::G_FPTRUNCSTORE;
8741 LoadOpc = TargetOpcode::G_LOAD;
8750 StackTy, StackTyAlign);
8751 MIRBuilder.buildStoreInstr(StoreOpc, SrcReg, StackTemp, *StoreMMO);
8754 StackTy, StackTyAlign);
8755 MIRBuilder.buildLoadInstr(LoadOpc, DstReg, StackTemp, *LoadMMO);
8757 MI.eraseFromParent();
8767 auto [Dst, Src] =
MI.getFirst2Regs();
8771 if (MRI.getType(Src).isVector())
8775 unsigned Flags =
MI.getFlags();
8778 MI.eraseFromParent();
8782 const unsigned ExpMask = 0x7ff;
8783 const unsigned ExpBiasf64 = 1023;
8784 const unsigned ExpBiasf16 = 15;
8813 auto SelectCC =
MIRBuilder.buildSelect(
S32, CmpM_NE0, Bits0x200, Zero);
8873 MI.eraseFromParent();
8880 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
8890 auto SrcI =
MIRBuilder.buildBitcast(I32Ty, SrcReg);
8912 auto Trunc =
MIRBuilder.buildTrunc(I16Ty, Srl);
8914 MI.eraseFromParent();
8920 auto [DstTy, SrcTy] =
MI.getFirst2LLTs();
8921 if (DstTy.getScalarType().isFloat16() && SrcTy.getScalarType().isFloat64())
8924 if (DstTy.getScalarType().isBFloat16() && SrcTy.getScalarType().isFloat32())
8931 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8932 LLT Ty = MRI.getType(Dst);
8934 auto CvtSrc1 =
MIRBuilder.buildSITOFP(Ty, Src1);
8935 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1,
MI.getFlags());
8936 MI.eraseFromParent();
8941 auto [DstFrac, DstInt, Src] =
MI.getFirst3Regs();
8942 LLT Ty = MRI.getType(Src);
8943 auto Flags =
MI.getFlags();
8951 FracToUse = FracPart.getReg(0);
8953 auto Abs =
MIRBuilder.buildFAbs(Ty, Src, Flags);
8957 auto Zero =
MIRBuilder.buildFConstant(Ty, 0.0);
8959 FracToUse =
Select.getReg(0);
8962 MIRBuilder.buildFCopysign(DstFrac, FracToUse, Src, Flags);
8965 MI.eraseFromParent();
8971 case TargetOpcode::G_SMIN:
8973 case TargetOpcode::G_SMAX:
8975 case TargetOpcode::G_UMIN:
8977 case TargetOpcode::G_UMAX:
8985 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8990 auto Cmp =
MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8991 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8993 MI.eraseFromParent();
9002 LLT DstTy = MRI.getType(Dst);
9003 LLT SrcTy = MRI.getType(Cmp->getReg(1));
9013 auto Zero =
MIRBuilder.buildConstant(DstTy, 0);
9014 auto IsGT =
MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
9016 auto IsLT =
MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
9019 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
9020 auto BC = TLI.getBooleanContents(DstTy.
isVector(),
false);
9021 if (TLI.preferSelectsOverBooleanArithmetic(
9024 auto One =
MIRBuilder.buildConstant(DstTy, 1);
9025 auto SelectZeroOrOne =
MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
9027 auto MinusOne =
MIRBuilder.buildConstant(DstTy, -1);
9028 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
9034 unsigned BoolExtOp =
9036 IsGT =
MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
9037 IsLT =
MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
9041 MI.eraseFromParent();
9047 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] =
MI.getFirst3RegLLTs();
9048 const int Src0Size = Src0Ty.getScalarSizeInBits();
9049 const int Src1Size = Src1Ty.getScalarSizeInBits();
9059 if (!(Src0Ty.getScalarType().isAnyScalar() ||
9060 Src0Ty.getScalarType().isInteger()))
9061 Src0Int =
MIRBuilder.buildBitcast(Src0IntTy, Src0).getReg(0);
9063 if (!(Src1Ty.getScalarType().isAnyScalar() ||
9064 Src1Ty.getScalarType().isInteger()))
9065 Src1Int =
MIRBuilder.buildBitcast(Src1IntTy, Src1).getReg(0);
9070 auto NotSignBitMask =
MIRBuilder.buildConstant(
9074 MIRBuilder.buildAnd(Src0IntTy, Src0Int, NotSignBitMask).getReg(0);
9076 if (Src0Ty == Src1Ty) {
9077 And1 =
MIRBuilder.buildAnd(Src1IntTy, Src1Int, SignBitMask).getReg(0);
9078 }
else if (Src0Size > Src1Size) {
9079 auto ShiftAmt =
MIRBuilder.buildConstant(Src0IntTy, Src0Size - Src1Size);
9080 auto Zext =
MIRBuilder.buildZExt(Src0IntTy, Src1Int);
9081 auto Shift =
MIRBuilder.buildShl(Src0IntTy, Zext, ShiftAmt);
9082 And1 =
MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
9084 auto ShiftAmt =
MIRBuilder.buildConstant(Src1IntTy, Src1Size - Src0Size);
9085 auto Shift =
MIRBuilder.buildLShr(Src1IntTy, Src1Int, ShiftAmt);
9086 auto Trunc =
MIRBuilder.buildTrunc(Src0IntTy, Shift);
9087 And1 =
MIRBuilder.buildAnd(Src0IntTy, Trunc, SignBitMask).getReg(0);
9093 unsigned Flags =
MI.getFlags();
9098 if (DstTy == DstIntTy)
9099 MIRBuilder.buildOr(Dst, And0, And1, Flags).getReg(0);
9105 MI.eraseFromParent();
9116 switch (
MI.getOpcode()) {
9117 case TargetOpcode::G_FMINNUM:
9118 NewOp = TargetOpcode::G_FMINNUM_IEEE;
9120 case TargetOpcode::G_FMINIMUMNUM:
9121 NewOp = TargetOpcode::G_FMINNUM;
9123 case TargetOpcode::G_FMAXNUM:
9124 NewOp = TargetOpcode::G_FMAXNUM_IEEE;
9126 case TargetOpcode::G_FMAXIMUMNUM:
9127 NewOp = TargetOpcode::G_FMAXNUM;
9133 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
9134 LLT Ty = MRI.getType(Dst);
9143 if (!VT->isKnownNeverSNaN(Src0))
9144 Src0 =
MIRBuilder.buildFCanonicalize(Ty, Src0,
MI.getFlags()).getReg(0);
9146 if (!VT->isKnownNeverSNaN(Src1))
9147 Src1 =
MIRBuilder.buildFCanonicalize(Ty, Src1,
MI.getFlags()).getReg(0);
9152 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1},
MI.getFlags());
9153 MI.eraseFromParent();
9159 unsigned Opc =
MI.getOpcode();
9160 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
9161 LLT Ty = MRI.getType(Dst);
9164 bool IsMax = (
Opc == TargetOpcode::G_FMAXIMUM);
9166 IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE;
9167 unsigned OpcNonIeee =
9168 IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM;
9169 bool MinMaxMustRespectOrderedZero =
false;
9173 if (LI.isLegalOrCustom({OpcIeee, Ty})) {
9175 MinMaxMustRespectOrderedZero =
true;
9176 }
else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) {
9181 Res =
MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0);
9186 (!VT->isKnownNeverNaN(Src0) || !VT->isKnownNeverNaN(Src1))) {
9189 LLT ElementTy = Ty.
isScalar() ? Ty : Ty.getElementType();
9193 NaN =
MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0);
9195 Res =
MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0);
9205 const unsigned Flags =
MI.getFlags();
9211 auto LHSTestZero =
MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass);
9213 MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags);
9215 auto RHSTestZero =
MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass);
9217 MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags);
9219 Res =
MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0);
9224 MI.eraseFromParent();
9231 LLT Ty = MRI.getType(DstReg);
9232 unsigned Flags =
MI.getFlags();
9237 MI.eraseFromParent();
9243 auto [DstReg,
X] =
MI.getFirst2Regs();
9244 const unsigned Flags =
MI.getFlags();
9245 const LLT Ty = MRI.getType(DstReg);
9257 auto AbsDiff =
MIRBuilder.buildFAbs(Ty, Diff, Flags);
9259 auto Half =
MIRBuilder.buildFConstant(Ty, 0.5);
9264 auto One =
MIRBuilder.buildFConstant(Ty, 1.0);
9265 auto Zero =
MIRBuilder.buildFConstant(Ty, 0.0);
9266 auto BoolFP =
MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
9267 auto SignedOffset =
MIRBuilder.buildFCopysign(Ty, BoolFP,
X);
9269 MIRBuilder.buildFAdd(DstReg,
T, SignedOffset, Flags);
9271 MI.eraseFromParent();
9276 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
9277 unsigned Flags =
MI.getFlags();
9278 LLT Ty = MRI.getType(DstReg);
9285 auto Trunc =
MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
9286 auto Zero =
MIRBuilder.buildFConstant(Ty, 0.0);
9289 SrcReg, Zero, Flags);
9291 SrcReg, Trunc, Flags);
9295 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
9296 MI.eraseFromParent();
9302 const unsigned NumOps =
MI.getNumOperands();
9303 auto [DstReg, DstTy, Src0Reg, Src0Ty] =
MI.getFirst2RegLLTs();
9304 unsigned PartSize = Src0Ty.getSizeInBits();
9309 for (
unsigned I = 2;
I !=
NumOps; ++
I) {
9310 const unsigned Offset = (
I - 1) * PartSize;
9313 auto ZextInput =
MIRBuilder.buildZExt(WideTy, SrcReg);
9316 MRI.createGenericVirtualRegister(WideTy);
9319 auto Shl =
MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
9320 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
9321 ResultReg = NextResult;
9324 if (DstTy.isPointer()) {
9325 if (
MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
9326 DstTy.getAddressSpace())) {
9334 MI.eraseFromParent();
9340 const unsigned NumDst =
MI.getNumOperands() - 1;
9341 Register SrcReg =
MI.getOperand(NumDst).getReg();
9342 Register Dst0Reg =
MI.getOperand(0).getReg();
9343 LLT DstTy = MRI.getType(Dst0Reg);
9352 LLT IntTy = MRI.getType(SrcReg);
9357 unsigned Offset = DstSize;
9358 for (
unsigned I = 1;
I != NumDst; ++
I,
Offset += DstSize) {
9360 auto Shift =
MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
9364 MI.eraseFromParent();
9383 if (
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
9384 InsertVal =
MI.getOperand(2).getReg();
9386 Register Idx =
MI.getOperand(
MI.getNumOperands() - 1).getReg();
9388 LLT VecTy = MRI.getType(SrcVec);
9398 SrcRegs[IdxVal] =
MI.getOperand(2).getReg();
9399 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
9401 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
9404 MI.eraseFromParent();
9409 LLVM_DEBUG(
dbgs() <<
"Can't handle non-byte element vectors yet\n");
9420 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
9427 int64_t
Offset = IdxVal * EltBytes;
9438 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
9441 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
9443 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
9446 MI.eraseFromParent();
9452 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
9453 MI.getFirst3RegLLTs();
9463 for (
int Idx : Mask) {
9465 if (!
Undef.isValid())
9471 assert(!Src0Ty.isScalar() &&
"Unexpected scalar G_SHUFFLE_VECTOR");
9473 int NumElts = Src0Ty.getNumElements();
9474 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
9475 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
9476 auto [It, Inserted] = CachedExtract.
try_emplace(Idx);
9478 auto IdxK =
MIRBuilder.buildConstant(IdxTy, ExtractIdx);
9480 MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK).getReg(0);
9485 assert(DstTy.isVector() &&
"Unexpected scalar G_SHUFFLE_VECTOR");
9486 MIRBuilder.buildBuildVector(DstReg, BuildVec);
9487 MI.eraseFromParent();
9493 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
9494 MI.getFirst4RegLLTs();
9496 if (VecTy.isScalableVector())
9512 auto OutPos =
MIRBuilder.buildConstant(IdxTy, 0);
9515 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
9518 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
9521 std::optional<APInt> PassthruSplatVal =
9524 if (PassthruSplatVal.has_value()) {
9526 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
9527 }
else if (HasPassthru) {
9528 auto Popcount =
MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
9529 Popcount =
MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
9535 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
9539 unsigned NumElmts = VecTy.getNumElements();
9540 for (
unsigned I = 0;
I < NumElmts; ++
I) {
9542 auto Val =
MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
9545 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
9548 auto MaskI =
MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
9553 OutPos =
MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
9555 if (HasPassthru &&
I == NumElmts - 1) {
9558 auto AllLanesSelected =
MIRBuilder.buildICmp(
9560 OutPos =
MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
9561 {OutPos, EndOfVector});
9565 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
9567 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
9572 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
9574 MI.eraseFromParent();
9591 if (Alignment >
Align(1)) {
9603 const auto &MF = *
MI.getMF();
9609 Register AllocSize =
MI.getOperand(1).getReg();
9612 LLT PtrTy = MRI.getType(Dst);
9613 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
9620 MI.eraseFromParent();
9626 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9631 MI.eraseFromParent();
9637 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9642 MI.eraseFromParent();
9648 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
9649 unsigned Offset =
MI.getOperand(2).getImm();
9652 if (SrcTy.isVector()) {
9653 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
9654 unsigned DstSize = DstTy.getSizeInBits();
9656 if ((
Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
9657 (
Offset + DstSize <= SrcTy.getSizeInBits())) {
9659 auto Unmerge =
MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
9663 for (
unsigned Idx =
Offset / SrcEltSize;
9664 Idx < (
Offset + DstSize) / SrcEltSize; ++Idx) {
9665 SubVectorElts.
push_back(Unmerge.getReg(Idx));
9667 if (SubVectorElts.
size() == 1)
9668 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
9670 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
9672 MI.eraseFromParent();
9678 if ((SrcTy.isPointer() &&
9679 DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) ||
9680 (DstTy.isPointer() &&
9681 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace()))) {
9682 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
9686 if ((DstTy.isScalar() || DstTy.isPointer()) &&
9687 (SrcTy.isScalar() || SrcTy.isPointer() ||
9688 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
9689 LLT SrcIntTy = SrcTy;
9690 if (!SrcTy.isScalar()) {
9692 SrcReg =
MIRBuilder.buildCast(SrcIntTy, SrcReg).getReg(0);
9696 if (DstTy.isPointer())
9698 MRI.createGenericVirtualRegister(
LLT::scalar(DstTy.getSizeInBits()));
9704 auto Shr =
MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
9708 if (DstTy.isPointer())
9711 MI.eraseFromParent();
9719 auto [Dst, Src, InsertSrc] =
MI.getFirst3Regs();
9722 LLT DstTy = MRI.getType(Src);
9723 LLT InsertTy = MRI.getType(InsertSrc);
9726 bool IsNonIntegralInsert =
9736 if ((IsNonIntegralInsert || IsNonIntegralDst) && InsertTy != EltTy) {
9737 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
9744 if ((
Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
9746 auto UnmergeSrc =
MIRBuilder.buildUnmerge(EltTy, Src);
9750 for (; Idx <
Offset / EltSize; ++Idx) {
9751 DstElts.
push_back(UnmergeSrc.getReg(Idx));
9756 auto UnmergeInsertSrc =
MIRBuilder.buildUnmerge(EltTy, InsertSrc);
9757 for (
unsigned i = 0; Idx < (
Offset + InsertSize) / EltSize;
9759 DstElts.
push_back(UnmergeInsertSrc.getReg(i));
9763 InsertSrc =
MIRBuilder.buildPtrToInt(EltTy, InsertSrc).getReg(0);
9765 InsertSrc =
MIRBuilder.buildIntToPtr(EltTy, InsertSrc).getReg(0);
9772 DstElts.
push_back(UnmergeSrc.getReg(Idx));
9775 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
9776 MI.eraseFromParent();
9785 if (IsNonIntegralDst || IsNonIntegralInsert) {
9786 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
9790 LLT IntDstTy = DstTy;
9794 Src =
MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
9799 InsertSrc =
MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
9805 ExtInsSrc =
MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
9811 auto Mask =
MIRBuilder.buildConstant(IntDstTy, MaskVal);
9812 auto MaskedSrc =
MIRBuilder.buildAnd(IntDstTy, Src, Mask);
9813 auto Or =
MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
9816 MI.eraseFromParent();
9822 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
9823 MI.getFirst4RegLLTs();
9824 const bool IsAdd =
MI.getOpcode() == TargetOpcode::G_SADDO;
9827 LLT BoolTy = Dst1Ty;
9829 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
9844 auto ResultLowerThanLHS =
9848 MIRBuilder.buildXor(Dst1, RHSNegative, ResultLowerThanLHS);
9852 auto LHSLessThanRHS =
9854 auto ResultNegative =
9856 MIRBuilder.buildXor(Dst1, LHSLessThanRHS, ResultNegative);
9860 MI.eraseFromParent();
9866 auto [Res, OvOut, LHS, RHS, CarryIn] =
MI.getFirst5Regs();
9867 const LLT Ty = MRI.getType(Res);
9870 auto Tmp =
MIRBuilder.buildAdd(Ty, LHS, RHS);
9871 auto CarryZ =
MIRBuilder.buildZExt(Ty, CarryIn);
9872 auto Sum =
MIRBuilder.buildAdd(Ty, Tmp, CarryZ);
9883 MI.eraseFromParent();
9888 auto [Res, OvOut, LHS, RHS, CarryIn] =
MI.getFirst5Regs();
9889 const LLT Ty = MRI.getType(Res);
9892 auto CarryZ =
MIRBuilder.buildZExt(Ty, CarryIn);
9893 auto RHSPlusCI =
MIRBuilder.buildAdd(Ty, RHS, CarryZ);
9894 auto Diff =
MIRBuilder.buildSub(Ty, LHS, RHSPlusCI);
9899 auto X2 =
MIRBuilder.buildXor(Ty, LHS, Diff);
9904 MI.eraseFromParent();
9910 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
9911 LLT Ty = MRI.getType(Res);
9915 switch (
MI.getOpcode()) {
9918 case TargetOpcode::G_UADDSAT:
9921 BaseOp = TargetOpcode::G_ADD;
9923 case TargetOpcode::G_SADDSAT:
9926 BaseOp = TargetOpcode::G_ADD;
9928 case TargetOpcode::G_USUBSAT:
9931 BaseOp = TargetOpcode::G_SUB;
9933 case TargetOpcode::G_SSUBSAT:
9936 BaseOp = TargetOpcode::G_SUB;
9951 uint64_t NumBits = Ty.getScalarSizeInBits();
9962 auto NegOne =
MIRBuilder.buildConstant(Ty, -1);
9970 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
9975 auto Min =
MIRBuilder.buildUMin(Ty, Not, RHS);
9976 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
9979 MI.eraseFromParent();
9985 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
9986 LLT Ty = MRI.getType(Res);
9990 unsigned OverflowOp;
9991 switch (
MI.getOpcode()) {
9994 case TargetOpcode::G_UADDSAT:
9997 OverflowOp = TargetOpcode::G_UADDO;
9999 case TargetOpcode::G_SADDSAT:
10002 OverflowOp = TargetOpcode::G_SADDO;
10004 case TargetOpcode::G_USUBSAT:
10007 OverflowOp = TargetOpcode::G_USUBO;
10009 case TargetOpcode::G_SSUBSAT:
10012 OverflowOp = TargetOpcode::G_SSUBO;
10017 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
10018 Register Tmp = OverflowRes.getReg(0);
10019 Register Ov = OverflowRes.getReg(1);
10028 uint64_t NumBits = Ty.getScalarSizeInBits();
10029 auto ShiftAmount =
MIRBuilder.buildConstant(Ty, NumBits - 1);
10030 auto Sign =
MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
10033 Clamp =
MIRBuilder.buildAdd(Ty, Sign, MinVal);
10041 Clamp =
MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
10043 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
10045 MI.eraseFromParent();
10051 assert((
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
10052 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
10053 "Expected shlsat opcode!");
10054 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SSHLSAT;
10055 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
10056 LLT Ty = MRI.getType(Res);
10060 auto Result =
MIRBuilder.buildShl(Ty, LHS, RHS);
10061 auto Orig = IsSigned ?
MIRBuilder.buildAShr(Ty, Result, RHS)
10070 SatVal =
MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
10075 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
10077 MI.eraseFromParent();
10082 auto [Dst, Src] =
MI.getFirst2Regs();
10083 const LLT Ty = MRI.getType(Src);
10084 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
10085 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
10088 auto ShiftAmt =
MIRBuilder.buildConstant(Ty, BaseShiftAmt);
10089 auto LSByteShiftedLeft =
MIRBuilder.buildShl(Ty, Src, ShiftAmt);
10090 auto MSByteShiftedRight =
MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
10091 auto Res =
MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
10094 for (
unsigned i = 1; i < SizeInBytes / 2; ++i) {
10097 auto Mask =
MIRBuilder.buildConstant(Ty, APMask);
10098 auto ShiftAmt =
MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
10100 auto LoByte =
MIRBuilder.buildAnd(Ty, Src, Mask);
10101 auto LoShiftedLeft =
MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
10102 Res =
MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
10104 auto SrcShiftedRight =
MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
10105 auto HiShiftedRight =
MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
10106 Res =
MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
10108 Res.getInstr()->getOperand(0).setReg(Dst);
10110 MI.eraseFromParent();
10117 const LLT Ty = Dst.getLLTTy(*
B.getMRI());
10120 auto LHS =
B.buildLShr(Ty,
B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
10121 auto RHS =
B.buildAnd(Ty,
B.buildShl(Ty, Src, C_N), MaskLoNTo0);
10122 return B.buildOr(Dst,
LHS,
RHS);
10127 auto [Dst, Src] =
MI.getFirst2Regs();
10128 const LLT SrcTy = MRI.getType(Src);
10129 unsigned Size = SrcTy.getScalarSizeInBits();
10130 unsigned VSize = SrcTy.getSizeInBits();
10133 if (SrcTy.isVector() && (VSize % 8 == 0) &&
10134 (LI.isLegal({TargetOpcode::G_BITREVERSE,
10135 {LLT::fixed_vector(VSize / 8, LLT::integer(8)),
10136 LLT::fixed_vector(VSize / 8, LLT::integer(8))}}))) {
10141 auto BSWAP =
MIRBuilder.buildBSwap(SrcTy, Src);
10142 auto Cast =
MIRBuilder.buildBitcast(VTy, BSWAP);
10143 auto RBIT =
MIRBuilder.buildBitReverse(VTy, Cast);
10147 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {SrcTy}, {Src});
10170 for (
unsigned I = 0, J =
Size - 1;
I <
Size; ++
I, --J) {
10174 Tmp2 = MIRBuilder.
buildShl(SrcTy, Src, ShAmt);
10177 Tmp2 = MIRBuilder.
buildLShr(SrcTy, Src, ShAmt);
10181 Tmp2 = MIRBuilder.
buildAnd(SrcTy, Tmp2, Mask);
10185 Tmp = MIRBuilder.
buildOr(SrcTy, Tmp, Tmp2);
10190 MI.eraseFromParent();
10198 bool IsRead =
MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
10199 int NameOpIdx = IsRead ? 1 : 0;
10200 int ValRegIndex = IsRead ? 0 : 1;
10202 Register ValReg =
MI.getOperand(ValRegIndex).getReg();
10203 const LLT Ty = MRI.getType(ValReg);
10205 cast<MDNode>(
MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
10212 (IsRead ?
"llvm.read_register" :
"llvm.write_register"),
10213 Fn,
MI.getDebugLoc()));
10217 MI.eraseFromParent();
10226 MI.eraseFromParent();
10232 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULH;
10233 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
10234 Register Result =
MI.getOperand(0).getReg();
10235 LLT OrigTy = MRI.getType(Result);
10239 auto LHS =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
MI.getOperand(1)});
10240 auto RHS =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
MI.getOperand(2)});
10242 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
10244 auto ShiftAmt =
MIRBuilder.buildConstant(WideTy, SizeInBits);
10245 auto Shifted =
MIRBuilder.buildInstr(ShiftOp, {WideTy}, {
Mul, ShiftAmt});
10248 MI.eraseFromParent();
10254 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
10259 MI.eraseFromParent();
10264 MI.eraseFromParent();
10271 unsigned BitSize = SrcTy.getScalarSizeInBits();
10275 auto AsInt = SrcTy == IntTy ?
MIRBuilder.buildCopy(IntTy, SrcReg)
10282 APInt ExpMask = Inf;
10284 APInt QNaNBitMask =
10288 auto SignBitC =
MIRBuilder.buildConstant(IntTy, SignBit);
10289 auto ValueMaskC =
MIRBuilder.buildConstant(IntTy, ValueMask);
10290 auto InfC =
MIRBuilder.buildConstant(IntTy, Inf);
10291 auto ExpMaskC =
MIRBuilder.buildConstant(IntTy, ExpMask);
10292 auto ZeroC =
MIRBuilder.buildConstant(IntTy, 0);
10294 auto Abs =
MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
10298 auto Res =
MIRBuilder.buildConstant(DstTy, 0);
10300 LLT DstTyCopy = DstTy;
10302 Res =
MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
10330 auto ExpBits =
MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
10333 Mask &= ~PartialCheck;
10342 else if (PartialCheck ==
fcZero)
10354 auto OneC =
MIRBuilder.buildConstant(IntTy, 1);
10355 auto VMinusOne =
MIRBuilder.buildSub(IntTy, V, OneC);
10356 auto SubnormalRes =
10358 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
10360 SubnormalRes =
MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
10361 appendToRes(SubnormalRes);
10368 else if (PartialCheck ==
fcInf)
10373 auto NegInfC =
MIRBuilder.buildConstant(IntTy, NegInf);
10380 auto InfWithQnanBitC =
MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
10381 if (PartialCheck ==
fcNan) {
10385 }
else if (PartialCheck ==
fcQNan) {
10395 Abs, InfWithQnanBitC);
10396 appendToRes(
MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
10403 APInt ExpLSB = ExpMask & ~(ExpMask.
shl(1));
10405 IntTy, Abs,
MIRBuilder.buildConstant(IntTy, ExpLSB));
10406 APInt MaxExpMinusOne = ExpMask - ExpLSB;
10409 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
10411 NormalRes =
MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
10414 DstTy, Sign,
MIRBuilder.buildConstant(DstTy, InversionMask));
10415 NormalRes =
MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
10417 appendToRes(NormalRes);
10421 MI.eraseFromParent();
10427 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
10428 MI.getFirst4RegLLTs();
10437 Op1Reg =
MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
10438 Op1Ty = MRI.getType(Op1Reg);
10439 Op2Reg =
MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
10440 Op2Ty = MRI.getType(Op2Reg);
10444 if (MaskTy.isScalar()) {
10452 MaskElt =
MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
10455 MaskTy = DstTy.changeElementType(
LLT::integer(DstTy.getScalarSizeInBits()));
10457 MIRBuilder.buildSExtOrTrunc(MaskTy.getScalarType(), MaskElt).getReg(0);
10459 if (DstTy.isVector()) {
10461 auto ShufSplat =
MIRBuilder.buildShuffleSplat(MaskTy, MaskElt);
10462 MaskReg = ShufSplat.getReg(0);
10466 }
else if (!DstTy.isVector()) {
10471 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
10475 if (!Op1Ty.getScalarType().isAnyScalar() &&
10476 !Op1Ty.getScalarType().isInteger())
10477 Op1Reg =
MIRBuilder.buildBitcast(Op1TyInt, Op1Reg).getReg(0);
10479 if (!Op2Ty.getScalarType().isAnyScalar() &&
10480 !Op2Ty.getScalarType().isInteger()) {
10482 Op2Ty.changeElementType(
LLT::integer(Op2Ty.getScalarSizeInBits()));
10483 Op2Reg =
MIRBuilder.buildBitcast(Op2TyInt, Op2Reg).getReg(0);
10486 auto NotMask =
MIRBuilder.buildNot(MaskTy, MaskReg);
10487 auto NewOp1 =
MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
10488 auto NewOp2 =
MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
10493 if (DstTy == Op1TyInt)
10496 auto Or =
MIRBuilder.buildOr(Op1TyInt, NewOp1, NewOp2);
10500 MI.eraseFromParent();
10506 unsigned Opcode =
MI.getOpcode();
10509 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
10510 : TargetOpcode::G_UDIV,
10511 {
MI.getOperand(0).getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
10513 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
10514 : TargetOpcode::G_UREM,
10515 {
MI.getOperand(1).getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
10516 MI.eraseFromParent();
10526 LLT DstTy = MRI.getType(
MI.getOperand(0).getReg());
10530 auto Shift =
MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
10533 MI.eraseFromParent();
10543 Register SrcReg =
MI.getOperand(1).getReg();
10544 LLT Ty = MRI.getType(SrcReg);
10545 auto Zero =
MIRBuilder.buildConstant(Ty, 0);
10548 MI.eraseFromParent();
10554 Register SrcReg =
MI.getOperand(1).getReg();
10555 Register DestReg =
MI.getOperand(0).getReg();
10557 auto Zero =
MIRBuilder.buildConstant(Ty, 0).getReg(0);
10558 auto Sub =
MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
10561 MI.eraseFromParent();
10567 assert((
MI.getOpcode() == TargetOpcode::G_ABDS ||
10568 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10569 "Expected G_ABDS or G_ABDU instruction");
10571 auto [DstReg, LHS, RHS] =
MI.getFirst3Regs();
10572 LLT Ty = MRI.getType(LHS);
10582 MIRBuilder.buildSelect(DstReg, ICmp, LHSSub, RHSSub);
10584 MI.eraseFromParent();
10590 assert((
MI.getOpcode() == TargetOpcode::G_ABDS ||
10591 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10592 "Expected G_ABDS or G_ABDU instruction");
10594 auto [DstReg, LHS, RHS] =
MI.getFirst3Regs();
10595 LLT Ty = MRI.getType(LHS);
10600 if (
MI.getOpcode() == TargetOpcode::G_ABDS) {
10601 MaxReg =
MIRBuilder.buildSMax(Ty, LHS, RHS).getReg(0);
10602 MinReg =
MIRBuilder.buildSMin(Ty, LHS, RHS).getReg(0);
10604 MaxReg =
MIRBuilder.buildUMax(Ty, LHS, RHS).getReg(0);
10605 MinReg =
MIRBuilder.buildUMin(Ty, LHS, RHS).getReg(0);
10607 MIRBuilder.buildSub(DstReg, MaxReg, MinReg);
10609 MI.eraseFromParent();
10614 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
10619 if (!(SrcTy.getScalarType().isAnyScalar() ||
10620 SrcTy.getScalarType().isInteger())) {
10622 SrcTy.changeElementType(
LLT::integer(SrcTy.getScalarSizeInBits()));
10623 CastedSrc =
MIRBuilder.buildBitcast(SrcTyInt, SrcReg).getReg(0);
10626 if (MRI.getType(DstReg) != TyInt) {
10630 .buildAnd(TyInt, CastedSrc,
10633 DstTy.getScalarSizeInBits())))
10645 MI.eraseFromParent();
10651 Register SrcReg =
MI.getOperand(1).getReg();
10652 LLT SrcTy = MRI.getType(SrcReg);
10653 LLT DstTy = MRI.getType(SrcReg);
10656 if (SrcTy.isScalar()) {
10661 MI.setDesc(
MIRBuilder.getTII().get(TargetOpcode::COPY));
10672 Register ListPtr =
MI.getOperand(1).getReg();
10673 LLT PtrTy = MRI.getType(ListPtr);
10680 auto VAList =
MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
10682 const Align A(
MI.getOperand(2).getImm());
10684 if (
A > TLI.getMinStackArgumentAlignment()) {
10686 MIRBuilder.buildConstant(PtrTyAsScalarTy,
A.value() - 1).getReg(0);
10687 auto AddDst =
MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
10688 auto AndDst =
MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst,
Log2(
A));
10689 VAList = AndDst.getReg(0);
10696 LLT LLTTy = MRI.getType(Dst);
10699 MIRBuilder.buildConstant(PtrTyAsScalarTy,
DL.getTypeAllocSize(Ty));
10700 auto Succ =
MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
10705 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
10707 Align EltAlignment =
DL.getABITypeAlign(Ty);
10710 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
10712 MI.eraseFromParent();
10717 [[maybe_unused]]
unsigned OpCode =
MI.getOpcode();
10718 assert((OpCode == TargetOpcode::G_SMULFIX ||
10719 OpCode == TargetOpcode::G_UMULFIX) &&
10720 "Operator must be either G_SMULFIX or G_UMULFIX!");
10721 auto [Dst, LHS, RHS] =
MI.getFirst3Regs();
10722 LLT Ty = MRI.getType(Dst);
10723 unsigned Scale =
MI.getOperand(3).getImm();
10727 MI.eraseFromParent();
10733 auto ShiftAmt =
MIRBuilder.buildConstant(WideTy, Scale);
10735 if (
MI.getOpcode() == TargetOpcode::G_SMULFIX) {
10744 if (
MI.getOpcode() == TargetOpcode::G_SMULFIX)
10751 MI.eraseFromParent();
10758 unsigned NumBits = Ty.getScalarSizeInBits();
10760 if (!Ty.isVector() && ValVRegAndVal) {
10761 APInt Scalar = ValVRegAndVal->Value.
trunc(8);
10769 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
10790 uint64_t KnownLen,
Align Alignment,
10792 auto &MF = *
MI.getParent()->getParent();
10797 assert(KnownLen != 0 &&
"Have a zero length memset length!");
10798 assert(!MemOps.
empty() &&
"Expected at least one memory op");
10801 MachineInstr *FIDef =
getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10802 const auto &DstMMO = **
MI.memoperands_begin();
10804 if (DstAlignCanChange) {
10807 Align NewAlign =
DL.getABITypeAlign(IRTy);
10808 if (NewAlign > Alignment) {
10809 Alignment = NewAlign;
10817 MachineIRBuilder MIB(
MI);
10819 LLT LargestTy = MemOps[0];
10820 for (
unsigned i = 1; i < MemOps.
size(); i++)
10822 LargestTy = MemOps[i];
10834 LLT PtrTy = MRI.getType(Dst);
10835 unsigned DstOff = 0;
10836 unsigned Size = KnownLen;
10837 for (
unsigned I = 0;
I < MemOps.
size();
I++) {
10838 LLT Ty = MemOps[
I];
10840 if (TySize >
Size) {
10844 DstOff -= TySize -
Size;
10854 TLI.isTruncateFree(LargestVT, VT))
10855 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
10868 Ptr = MIB.buildObjectPtrOffset(PtrTy, Dst,
Offset).getReg(0);
10871 MIB.buildStore(
Value, Ptr, *StoreMMO);
10876 MI.eraseFromParent();
10882 uint64_t KnownLen,
Align Alignment,
10884 auto &MF = *
MI.getParent()->getParent();
10888 assert(KnownLen != 0 &&
"Have a zero length memcpy length!");
10889 assert(!MemOps.
empty() &&
"Expected at least one memory op");
10892 MachineInstr *FIDef =
getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10898 const auto &DstMMO = **
MI.memoperands_begin();
10899 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
10901 if (DstAlignCanChange) {
10904 Align NewAlign =
DL.getABITypeAlign(IRTy);
10909 if (!
TRI->hasStackRealignment(MF))
10910 if (MaybeAlign StackAlign =
DL.getStackAlignment())
10911 NewAlign = std::min(NewAlign, *StackAlign);
10913 if (NewAlign > Alignment) {
10914 Alignment = NewAlign;
10922 LLVM_DEBUG(
dbgs() <<
"Inlining memcpy: " <<
MI <<
" into loads & stores\n");
10924 MachineIRBuilder MIB(
MI);
10930 unsigned CurrOffset = 0;
10931 unsigned Size = KnownLen;
10932 for (
auto CopyTy : MemOps) {
10935 if (CopyTy.getSizeInBytes() >
Size)
10936 CurrOffset -= CopyTy.getSizeInBytes() -
Size;
10947 if (CurrOffset != 0) {
10948 LLT SrcTy = MRI.getType(Src);
10952 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src,
Offset).getReg(0);
10954 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
10958 if (CurrOffset != 0) {
10959 LLT DstTy = MRI.getType(Dst);
10960 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst,
Offset).getReg(0);
10962 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
10963 CurrOffset += CopyTy.getSizeInBytes();
10964 Size -= CopyTy.getSizeInBytes();
10967 MI.eraseFromParent();
10973 uint64_t KnownLen,
Align Alignment,
10975 auto &MF = *
MI.getParent()->getParent();
10979 assert(KnownLen != 0 &&
"Have a zero length memmove length!");
10980 assert(!MemOps.
empty() &&
"Expected at least one memory op");
10983 MachineInstr *FIDef =
getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10984 const auto &DstMMO = **
MI.memoperands_begin();
10985 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
10987 if (DstAlignCanChange) {
10990 Align NewAlign =
DL.getABITypeAlign(IRTy);
10995 if (!
TRI->hasStackRealignment(MF))
10996 if (MaybeAlign StackAlign =
DL.getStackAlignment())
10997 NewAlign = std::min(NewAlign, *StackAlign);
10999 if (NewAlign > Alignment) {
11000 Alignment = NewAlign;
11008 LLVM_DEBUG(
dbgs() <<
"Inlining memmove: " <<
MI <<
" into loads & stores\n");
11010 MachineIRBuilder MIB(
MI);
11014 unsigned CurrOffset = 0;
11015 SmallVector<Register, 16> LoadVals;
11016 for (
auto CopyTy : MemOps) {
11023 if (CurrOffset != 0) {
11024 LLT SrcTy = MRI.getType(Src);
11027 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src,
Offset).getReg(0);
11029 LoadVals.
push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
11030 CurrOffset += CopyTy.getSizeInBytes();
11034 for (
unsigned I = 0;
I < MemOps.size(); ++
I) {
11035 LLT CopyTy = MemOps[
I];
11041 if (CurrOffset != 0) {
11042 LLT DstTy = MRI.getType(Dst);
11045 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst,
Offset).getReg(0);
11047 MIB.buildStore(LoadVals[
I], StorePtr, *StoreMMO);
11050 MI.eraseFromParent();
11057 const unsigned Opc =
MI.getOpcode();
11058 assert((
Opc == TargetOpcode::G_MEMCPY ||
11059 Opc == TargetOpcode::G_MEMCPY_INLINE ||
11060 Opc == TargetOpcode::G_MEMMOVE ||
Opc == TargetOpcode::G_MEMSET ||
11061 Opc == TargetOpcode::G_MEMSET_INLINE) &&
11062 "Expected memcpy like instruction");
11064 if (KnownLen == 0) {
11065 MI.eraseFromParent();
11069 if (
Opc == TargetOpcode::G_MEMCPY ||
Opc == TargetOpcode::G_MEMCPY_INLINE) {
11070 return lowerMemcpy(
MI, Dst, Src, KnownLen, Alignment, DstAlignCanChange,
11073 if (
Opc == TargetOpcode::G_MEMMOVE)
11074 return lowerMemmove(
MI, Dst, Src, KnownLen, Alignment, DstAlignCanChange,
11076 if (
Opc == TargetOpcode::G_MEMSET ||
Opc == TargetOpcode::G_MEMSET_INLINE)
11077 return lowerMemset(
MI, Dst, Src, KnownLen, Alignment, DstAlignCanChange,
11087 bool DstAlignCanChange;
11088 std::vector<LLT> MemOps;
11090 DstAlignCanChange, MemOps))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static LegalizerHelper::LegalizeResult loweri64tof16ITOFP(MachineInstr &MI, Register Dst, LLT DstTy, Register Src, LLT SrcTy, MachineIRBuilder &MIRBuilder)
i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16.
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
static const fltSemantics & IEEEsingle()
static constexpr roundingMode rmTowardZero
static const fltSemantics & IEEEdouble()
static constexpr roundingMode rmNearestTiesToEven
opStatus
IEEE-754R 7: Default exception handling.
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
APInt bitcastToAPInt() const
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
uint64_t getZExtValue() const
Get zero extended value.
unsigned getActiveBits() const
Compute the number of active bits in the value.
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
void negate()
Negate this APInt in place.
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
APInt shl(unsigned shiftAmt) const
Left-shift function.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
bool empty() const
Check if the array is empty.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getValueReg() const
Get the stored value register.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Register getSubVec() const
Register getBigVec() const
uint64_t getIndexImm() const
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
Represents a threeway compare.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
static constexpr LLT float64()
Get a 64-bit IEEE double value.
LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
LLT getScalarType() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr ElementCount getElementCount() const
static constexpr LLT float16()
Get a 16-bit IEEE half value.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isPointerOrPointerVector() const
static LLT integer(unsigned SizeInBits)
static constexpr LLT bfloat16()
constexpr LLT changeVectorElementType(LLT NewEltTy) const
Returns a vector with the same number of elements but the new element type.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
LLT changeVectorElementCount(ElementCount EC) const
Return a vector with the same element type and the new element count.
static constexpr LLT float32()
Get a 32-bit IEEE float value.
static LLT floatIEEE(unsigned SizeInBits)
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
This is an important class for using LLVM in a threaded context.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LLVM_ABI LegalizeResult lowerShlSat(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LLVM_ABI LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LLVM_ABI LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSSUBE(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerLoad(GAnyLoad &MI)
LLVM_ABI LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
LLVM_ABI void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LLVM_ABI LegalizerHelper::LegalizeResult createAtomicLibcall(MachineInstr &MI) const
LLVM_ABI LegalizeResult lowerFConstant(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerBitreverse(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LLVM_ABI LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTLS(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerEXT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerStore(GStore &MI)
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult narrowScalarShiftMultiway(MachineInstr &MI, LLT TargetTy)
Multi-way shift legalization: directly split wide shifts into target-sized parts in a single step,...
LLVM_ABI LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, Register Dst, Register Src, uint64_t KnownLen, Align Alignment, bool DstAlignCanChange, ArrayRef< LLT > MemOps)
LLVM_ABI MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI Register buildConstantShiftPart(unsigned Opcode, unsigned PartIdx, unsigned NumParts, ArrayRef< Register > SrcParts, const ShiftParams &Params, LLT TargetTy, LLT ShiftAmtTy)
Generates a single output part for constant shifts using direct indexing.
LLVM_ABI void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LLVM_ABI LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LLVM_ABI LegalizeResult lowerFPTOUI(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LLVM_ABI LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LLVM_ABI LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B, const LibcallLoweringInfo *Libcalls=nullptr)
LLVM_ABI LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LLVM_ABI LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LLVM_ABI LegalizeResult lowerBitcast(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerInsert(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtract(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LLVM_ABI LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPOWI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPExtAndTruncMem(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFAbs(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI)
const LegalizerInfo & getLegalizerInfo() const
Expose LegalizerInfo so the clients can re-use.
LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LLVM_ABI LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarShiftByConstantMultiway(MachineInstr &MI, const APInt &Amt, LLT TargetTy, LLT ShiftAmtTy)
Optimized path for constant shift amounts using static indexing.
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMODF(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LLVM_ABI LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSADDE(MachineInstr &MI)
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LLVM_ABI LegalizeResult lowerFunnelShift(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F32_TO_BF16(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI LegalizeResult conversionLibcall(MachineInstr &MI, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, bool IsSigned=false) const
LLVM_ABI void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LLVM_ABI LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI)
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LLVM_ABI LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LLVM_ABI LegalizeResult lowerFPTOSI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerUITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerShuffleVector(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerMergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult createMemLibcall(MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Create a libcall to memcpy et al.
LLVM_ABI LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMulfix(MachineInstr &MI)
LLVM_ABI void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LLVM_ABI LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult createLibcall(const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr) const
Helper function that creates a libcall to the given Name using the given calling convention CC.
LLVM_ABI Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LLVM_ABI LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerDIVREM(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI Register buildVariableShiftPart(unsigned Opcode, Register MainOperand, Register ShiftAmt, LLT TargetTy, Register CarryOperand=Register())
Generates a shift part with carry for variable shifts.
LLVM_ABI void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
LLVM_ABI void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LLVM_ABI LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LLVM_ABI LegalizeResult lowerStackRestore(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerStackSave(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBswap(MachineInstr &MI)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LLVM_ABI LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LLVM_ABI LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver) const
LLVM_ABI LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
LLVM_ABI LegalizeResult lowerFMinimumMaximum(MachineInstr &MI)
Tracks which library functions to use for a particular subtarget.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
LLVM_ABI StringRef getString() const
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
LLVM_ABI iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Helper class to build MachineInstr.
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLT getLLTTy(const MachineRegisterInfo &MRI) const
Represent a constant reference to a string, i.e.
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
TargetInstrInfo - Interface to description of machine instruction set.
@ UndefinedBooleanContent
@ ZeroOrNegativeOneBooleanContent
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
static LLVM_ABI Type * getX86_FP80Ty(LLVMContext &C)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Type * getType() const
All values are typed, get the type of this value.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_ABI LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
unsigned M1(unsigned Val)
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
auto dyn_cast_or_null(const Y &Val)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
@ Success
The lock was released successfully.
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
LLVM_ABI bool canLowerMemCpyFamily(const MachineInstr &MI, const MachineRegisterInfo &MRI, unsigned MaxLen, Register &Dst, Register &Src, uint64_t &KnownLen, Align &Alignment, bool &DstAlignCanChange, std::vector< LLT > &MemOps)
Matcher for memcpy-like instructions.
To bit_cast(const From &from) noexcept
@ Mul
Product of integers.
@ FSub
Subtraction of floats.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Next
unsigned Log2(Align A)
Returns the log2 of the alignment.
LLVM_ABI LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
@ Custom
The result value requires a custom uniformity check.
LLVM_ABI void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
SmallVector< ISD::ArgFlagsTy, 4 > Flags
CallingConv::ID CallConv
Calling convention to be used for the call.
bool isKnownNeverZero() const
Return true if it's known this can never be a zero.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.