44#define DEBUG_TYPE "legalizer"
57static std::pair<int, int>
63 unsigned NumParts =
Size / NarrowSize;
64 unsigned LeftoverSize =
Size - NumParts * NarrowSize;
67 if (LeftoverSize == 0)
72 if (LeftoverSize % EltSize != 0)
81 return std::make_pair(NumParts, NumLeftover);
89 switch (Ty.getSizeInBits()) {
130 auto Step = LI.getAction(
MI, MRI);
131 switch (Step.Action) {
146 return bitcast(
MI, Step.TypeIdx, Step.NewType);
149 return lower(
MI, Step.TypeIdx, Step.NewType);
158 return LI.legalizeCustom(*
this,
MI, LocObserver) ?
Legalized
166void LegalizerHelper::insertParts(
Register DstReg,
188 assert(LeftoverRegs.
size() == 1 &&
"Expected one leftover register");
190 AllRegs.append(LeftoverRegs.
begin(), LeftoverRegs.
end());
191 return mergeMixedSubvectors(DstReg, AllRegs);
197 extractGCDType(GCDRegs, GCDTy, PartReg);
198 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
199 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
204 LLT Ty = MRI.getType(
Reg);
212void LegalizerHelper::mergeMixedSubvectors(
Register DstReg,
215 for (
unsigned i = 0; i < PartRegs.
size() - 1; ++i)
216 appendVectorElts(AllElts, PartRegs[i]);
219 if (!MRI.getType(Leftover).isVector())
222 appendVectorElts(AllElts, Leftover);
224 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
230 assert(
MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
232 const int StartIdx = Regs.
size();
233 const int NumResults =
MI.getNumOperands() - 1;
235 for (
int I = 0;
I != NumResults; ++
I)
236 Regs[StartIdx +
I] =
MI.getOperand(
I).getReg();
241 LLT SrcTy = MRI.getType(SrcReg);
242 if (SrcTy == GCDTy) {
248 auto Unmerge =
MIRBuilder.buildUnmerge(GCDTy, SrcReg);
255 LLT SrcTy = MRI.getType(SrcReg);
257 extractGCDType(Parts, GCDTy, SrcReg);
261LLT LegalizerHelper::buildLCMMergePieces(
LLT DstTy,
LLT NarrowTy,
LLT GCDTy,
263 unsigned PadStrategy) {
268 int NumOrigSrc = VRegs.
size();
274 if (NumOrigSrc < NumParts * NumSubParts) {
275 if (PadStrategy == TargetOpcode::G_ZEXT)
276 PadReg =
MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
277 else if (PadStrategy == TargetOpcode::G_ANYEXT)
278 PadReg =
MIRBuilder.buildUndef(GCDTy).getReg(0);
280 assert(PadStrategy == TargetOpcode::G_SEXT);
285 PadReg =
MIRBuilder.buildAShr(GCDTy, VRegs.
back(), ShiftAmt).getReg(0);
301 for (
int I = 0;
I != NumParts; ++
I) {
302 bool AllMergePartsArePadding =
true;
305 for (
int J = 0; J != NumSubParts; ++J) {
306 int Idx =
I * NumSubParts + J;
307 if (Idx >= NumOrigSrc) {
308 SubMerge[J] = PadReg;
312 SubMerge[J] = VRegs[Idx];
315 AllMergePartsArePadding =
false;
321 if (AllMergePartsArePadding && !AllPadReg) {
322 if (PadStrategy == TargetOpcode::G_ANYEXT)
323 AllPadReg =
MIRBuilder.buildUndef(NarrowTy).getReg(0);
324 else if (PadStrategy == TargetOpcode::G_ZEXT)
325 AllPadReg =
MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
334 Remerge[
I] = AllPadReg;
338 if (NumSubParts == 1)
339 Remerge[
I] = SubMerge[0];
341 Remerge[
I] =
MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
344 if (AllMergePartsArePadding && !AllPadReg)
345 AllPadReg = Remerge[
I];
348 VRegs = std::move(Remerge);
352void LegalizerHelper::buildWidenedRemergeToDst(
Register DstReg,
LLT LCMTy,
354 LLT DstTy = MRI.getType(DstReg);
359 if (DstTy == LCMTy) {
360 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
364 auto Remerge =
MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
373 UnmergeDefs[0] = DstReg;
374 for (
unsigned I = 1;
I != NumDefs; ++
I)
375 UnmergeDefs[
I] = MRI.createGenericVirtualRegister(DstTy);
378 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
386#define RTLIBCASE_INT(LibcallPrefix) \
390 return RTLIB::LibcallPrefix##32; \
392 return RTLIB::LibcallPrefix##64; \
394 return RTLIB::LibcallPrefix##128; \
396 llvm_unreachable("unexpected size"); \
400#define RTLIBCASE(LibcallPrefix) \
404 return RTLIB::LibcallPrefix##32; \
406 return RTLIB::LibcallPrefix##64; \
408 return RTLIB::LibcallPrefix##80; \
410 return RTLIB::LibcallPrefix##128; \
412 llvm_unreachable("unexpected size"); \
417 case TargetOpcode::G_LROUND:
419 case TargetOpcode::G_LLROUND:
421 case TargetOpcode::G_MUL:
423 case TargetOpcode::G_SDIV:
425 case TargetOpcode::G_UDIV:
427 case TargetOpcode::G_SREM:
429 case TargetOpcode::G_UREM:
431 case TargetOpcode::G_CTLZ_ZERO_POISON:
433 case TargetOpcode::G_FADD:
435 case TargetOpcode::G_FSUB:
437 case TargetOpcode::G_FMUL:
439 case TargetOpcode::G_FDIV:
441 case TargetOpcode::G_FEXP:
443 case TargetOpcode::G_FEXP2:
445 case TargetOpcode::G_FEXP10:
447 case TargetOpcode::G_FREM:
449 case TargetOpcode::G_FPOW:
451 case TargetOpcode::G_FPOWI:
453 case TargetOpcode::G_FMA:
455 case TargetOpcode::G_FSIN:
457 case TargetOpcode::G_FCOS:
459 case TargetOpcode::G_FTAN:
461 case TargetOpcode::G_FASIN:
463 case TargetOpcode::G_FACOS:
465 case TargetOpcode::G_FATAN:
467 case TargetOpcode::G_FATAN2:
469 case TargetOpcode::G_FSINH:
471 case TargetOpcode::G_FCOSH:
473 case TargetOpcode::G_FTANH:
475 case TargetOpcode::G_FSINCOS:
477 case TargetOpcode::G_FMODF:
479 case TargetOpcode::G_FLOG10:
481 case TargetOpcode::G_FLOG:
483 case TargetOpcode::G_FLOG2:
485 case TargetOpcode::G_FLDEXP:
487 case TargetOpcode::G_FCEIL:
489 case TargetOpcode::G_FFLOOR:
491 case TargetOpcode::G_FMINNUM:
493 case TargetOpcode::G_FMAXNUM:
495 case TargetOpcode::G_FMINIMUMNUM:
497 case TargetOpcode::G_FMAXIMUMNUM:
499 case TargetOpcode::G_FSQRT:
501 case TargetOpcode::G_FRINT:
503 case TargetOpcode::G_FNEARBYINT:
505 case TargetOpcode::G_INTRINSIC_TRUNC:
507 case TargetOpcode::G_INTRINSIC_ROUND:
509 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
511 case TargetOpcode::G_INTRINSIC_LRINT:
513 case TargetOpcode::G_INTRINSIC_LLRINT:
533 AttributeList CallerAttrs =
F.getAttributes();
534 if (AttrBuilder(
F.getContext(), CallerAttrs.getRetAttrs())
535 .removeAttribute(Attribute::NoAlias)
536 .removeAttribute(Attribute::NonNull)
541 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
542 CallerAttrs.hasRetAttr(Attribute::SExt))
553 if (
MI.getOpcode() == TargetOpcode::G_BZERO)
560 if (!VReg.
isVirtual() || VReg !=
Next->getOperand(1).getReg())
568 if (Ret ==
MBB.instr_end() || !Ret->isReturn())
571 if (Ret->getNumImplicitOperands() != 1)
574 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
591 auto &CLI = *
MIRBuilder.getMF().getSubtarget().getCallLowering();
596 Info.OrigRet = Result;
599 (Result.Ty->isVoidTy() ||
600 Result.Ty ==
MIRBuilder.getMF().getFunction().getReturnType()) &&
608 if (
MI && Info.LoweredTailCall) {
609 assert(Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
619 (
Next->isCopy() ||
Next->isReturn() ||
Next->isDebugInstr()) &&
620 "Expected instr following MI to be return or debug inst?");
623 Next->eraseFromParent();
624 }
while (
MI->getNextNode());
639 RTLIB::LibcallImpl LibcallImpl = Libcalls->getLibcallImpl(
Libcall);
640 if (LibcallImpl == RTLIB::Unsupported)
644 const CallingConv::ID CC = Libcalls->getLibcallImplCallingConv(LibcallImpl);
658 Args.push_back({MO.getReg(), OpType, 0});
677 unsigned AddrSpace =
DL.getAllocaAddrSpace();
695 if (LibcallResult != LegalizeResult::Legalized)
703 MIRBuilder.
buildLoad(DstSin, StackPtrSin, *LoadMMOSin);
704 MIRBuilder.
buildLoad(DstCos, StackPtrCos, *LoadMMOCos);
705 MI.eraseFromParent();
720 LLT DstTy = MRI.getType(DstFrac);
725 unsigned AddrSpace =
DL.getAllocaAddrSpace();
726 MachinePointerInfo PtrInfo;
735 {{Src, OpType, 0}, {StackPtrInt, PointerType::get(Ctx, AddrSpace), 1}},
738 if (LibcallResult != LegalizeResult::Legalized)
744 MIRBuilder.
buildLoad(DstInt, StackPtrInt, *LoadMMOInt);
745 MI.eraseFromParent();
756 case TargetOpcode::G_FPEXT:
758 case TargetOpcode::G_FPTRUNC:
760 case TargetOpcode::G_FPTOSI:
762 case TargetOpcode::G_FPTOUI:
764 case TargetOpcode::G_SITOFP:
766 case TargetOpcode::G_UITOFP:
776 if (FromType->isIntegerTy()) {
777 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
778 Arg.
Flags[0].setSExt();
780 Arg.
Flags[0].setZExt();
791 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
795 for (
unsigned i = 0; i <
MI.getNumOperands() - 1; ++i) {
799 LLT OpLLT = MRI.getType(Reg);
800 Type *OpTy =
nullptr;
805 Args.push_back({Reg, OpTy, 0});
808 auto &CLI = *
MIRBuilder.getMF().getSubtarget().getCallLowering();
809 RTLIB::Libcall RTLibcall;
810 unsigned Opc =
MI.getOpcode();
812 case TargetOpcode::G_BZERO:
813 RTLibcall = RTLIB::BZERO;
815 case TargetOpcode::G_MEMCPY:
816 RTLibcall = RTLIB::MEMCPY;
817 Args[0].Flags[0].setReturned();
819 case TargetOpcode::G_MEMMOVE:
820 RTLibcall = RTLIB::MEMMOVE;
821 Args[0].Flags[0].setReturned();
823 case TargetOpcode::G_MEMSET:
824 RTLibcall = RTLIB::MEMSET;
825 Args[0].Flags[0].setReturned();
834 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
837 if (RTLibcallImpl == RTLIB::Unsupported) {
844 Info.
CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
851 MI.getOperand(
MI.getNumOperands() - 1).getImm() &&
858 if (Info.LoweredTailCall) {
859 assert(Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
869 (
Next->isCopy() ||
Next->isReturn() ||
Next->isDebugInstr()) &&
870 "Expected instr following MI to be return or debug inst?");
873 Next->eraseFromParent();
874 }
while (
MI.getNextNode());
884 unsigned Opc =
MI.getOpcode();
886 auto &MMO = AtomicMI.getMMO();
887 auto Ordering = MMO.getMergedOrdering();
888 LLT MemType = MMO.getMemoryType();
891 return RTLIB::UNKNOWN_LIBCALL;
893#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
895 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
897 case TargetOpcode::G_ATOMIC_CMPXCHG:
898 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
899 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
900 return getOutlineAtomicHelper(LC, Ordering, MemSize);
902 case TargetOpcode::G_ATOMICRMW_XCHG: {
903 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
904 return getOutlineAtomicHelper(LC, Ordering, MemSize);
906 case TargetOpcode::G_ATOMICRMW_ADD:
907 case TargetOpcode::G_ATOMICRMW_SUB: {
908 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
909 return getOutlineAtomicHelper(LC, Ordering, MemSize);
911 case TargetOpcode::G_ATOMICRMW_AND: {
912 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
913 return getOutlineAtomicHelper(LC, Ordering, MemSize);
915 case TargetOpcode::G_ATOMICRMW_OR: {
916 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
917 return getOutlineAtomicHelper(LC, Ordering, MemSize);
919 case TargetOpcode::G_ATOMICRMW_XOR: {
920 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
921 return getOutlineAtomicHelper(LC, Ordering, MemSize);
924 return RTLIB::UNKNOWN_LIBCALL;
937 unsigned Opc =
MI.getOpcode();
939 case TargetOpcode::G_ATOMIC_CMPXCHG:
940 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
943 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
944 MI.getFirst4RegLLTs();
947 if (
Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
948 std::tie(Ret, RetLLT,
Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
949 NewLLT) =
MI.getFirst5RegLLTs();
959 case TargetOpcode::G_ATOMICRMW_XCHG:
960 case TargetOpcode::G_ATOMICRMW_ADD:
961 case TargetOpcode::G_ATOMICRMW_SUB:
962 case TargetOpcode::G_ATOMICRMW_AND:
963 case TargetOpcode::G_ATOMICRMW_OR:
964 case TargetOpcode::G_ATOMICRMW_XOR: {
965 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] =
MI.getFirst3RegLLTs();
968 if (
Opc == TargetOpcode::G_ATOMICRMW_AND)
972 else if (
Opc == TargetOpcode::G_ATOMICRMW_SUB)
987 auto &CLI = *
MIRBuilder.getMF().getSubtarget().getCallLowering();
989 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
992 if (RTLibcallImpl == RTLIB::Unsupported) {
999 Info.
CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
1013static RTLIB::Libcall
1015 RTLIB::Libcall RTLibcall;
1016 switch (
MI.getOpcode()) {
1017 case TargetOpcode::G_GET_FPENV:
1018 RTLibcall = RTLIB::FEGETENV;
1020 case TargetOpcode::G_SET_FPENV:
1021 case TargetOpcode::G_RESET_FPENV:
1022 RTLibcall = RTLIB::FESETENV;
1024 case TargetOpcode::G_GET_FPMODE:
1025 RTLibcall = RTLIB::FEGETMODE;
1027 case TargetOpcode::G_SET_FPMODE:
1028 case TargetOpcode::G_RESET_FPMODE:
1029 RTLibcall = RTLIB::FESETMODE;
1061 LLT StateTy = MRI.getType(Dst);
1064 MachinePointerInfo TempPtrInfo;
1068 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
1073 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}), LocObserver,
1081 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
1099 LLT StateTy = MRI.getType(Src);
1102 MachinePointerInfo TempPtrInfo;
1111 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
1116 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1117 LocObserver,
nullptr);
1123static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1125#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1129 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1131 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1133 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1135 llvm_unreachable("unexpected size"); \
1166 LLT OpLLT = MRI.getType(
Cmp->getLHSReg());
1169 OpLLT != MRI.getType(
Cmp->getRHSReg()))
1176 LLT DstTy = MRI.getType(DstReg);
1177 const auto Cond =
Cmp->getCond();
1182 const auto BuildLibcall = [&](
const RTLIB::Libcall
Libcall,
1187 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1191 {{
Cmp->getLHSReg(), OpType, 0}, {
Cmp->getRHSReg(), OpType, 1}},
1198 .buildICmp(ICmpPred, Res, Temp,
MIRBuilder.buildConstant(TempLLT, 0))
1204 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1206 if (BuildLibcall(
Libcall, ICmpPred, DstReg)) {
1219 const auto [OeqLibcall, OeqPred] =
1221 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1223 const auto [UnoLibcall, UnoPred] =
1225 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1240 const auto [OeqLibcall, OeqPred] =
1245 const auto [UnoLibcall, UnoPred] =
1250 if (NotOeq && NotUno)
1269 const auto [InversedLibcall, InversedPred] =
1271 if (!BuildLibcall(InversedLibcall,
1296 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
1298 unsigned PtrSize =
DL.getPointerSizeInBits(AddrSpace);
1301 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1307 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}), LocObserver, &
MI);
1312 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
1314 switch (
MI.getOpcode()) {
1317 case TargetOpcode::G_MUL:
1318 case TargetOpcode::G_SDIV:
1319 case TargetOpcode::G_UDIV:
1320 case TargetOpcode::G_SREM:
1321 case TargetOpcode::G_UREM:
1322 case TargetOpcode::G_CTLZ_ZERO_POISON: {
1323 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1331 case TargetOpcode::G_FADD:
1332 case TargetOpcode::G_FSUB:
1333 case TargetOpcode::G_FMUL:
1334 case TargetOpcode::G_FDIV:
1335 case TargetOpcode::G_FMA:
1336 case TargetOpcode::G_FPOW:
1337 case TargetOpcode::G_FREM:
1338 case TargetOpcode::G_FCOS:
1339 case TargetOpcode::G_FSIN:
1340 case TargetOpcode::G_FTAN:
1341 case TargetOpcode::G_FACOS:
1342 case TargetOpcode::G_FASIN:
1343 case TargetOpcode::G_FATAN:
1344 case TargetOpcode::G_FATAN2:
1345 case TargetOpcode::G_FCOSH:
1346 case TargetOpcode::G_FSINH:
1347 case TargetOpcode::G_FTANH:
1348 case TargetOpcode::G_FLOG10:
1349 case TargetOpcode::G_FLOG:
1350 case TargetOpcode::G_FLOG2:
1351 case TargetOpcode::G_FEXP:
1352 case TargetOpcode::G_FEXP2:
1353 case TargetOpcode::G_FEXP10:
1354 case TargetOpcode::G_FCEIL:
1355 case TargetOpcode::G_FFLOOR:
1356 case TargetOpcode::G_FMINNUM:
1357 case TargetOpcode::G_FMAXNUM:
1358 case TargetOpcode::G_FMINIMUMNUM:
1359 case TargetOpcode::G_FMAXIMUMNUM:
1360 case TargetOpcode::G_FSQRT:
1361 case TargetOpcode::G_FRINT:
1362 case TargetOpcode::G_FNEARBYINT:
1363 case TargetOpcode::G_INTRINSIC_TRUNC:
1364 case TargetOpcode::G_INTRINSIC_ROUND:
1365 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1366 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1370 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1378 case TargetOpcode::G_FSINCOS: {
1379 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1383 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1388 case TargetOpcode::G_FMODF: {
1389 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1393 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1398 case TargetOpcode::G_LROUND:
1399 case TargetOpcode::G_LLROUND:
1400 case TargetOpcode::G_INTRINSIC_LRINT:
1401 case TargetOpcode::G_INTRINSIC_LLRINT: {
1402 LLT LLTy = MRI.getType(
MI.getOperand(1).getReg());
1406 Ctx, MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits());
1408 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1414 {{
MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &
MI);
1417 MI.eraseFromParent();
1420 case TargetOpcode::G_FPOWI:
1421 case TargetOpcode::G_FLDEXP: {
1422 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1426 Ctx, MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits());
1428 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1433 {
MI.getOperand(1).getReg(), HLTy, 0},
1434 {
MI.getOperand(2).getReg(), ITy, 1}};
1435 Args[1].Flags[0].setSExt();
1437 Libcall, {
MI.getOperand(0).getReg(), HLTy, 0}, Args, LocObserver, &
MI);
1442 case TargetOpcode::G_FPEXT:
1443 case TargetOpcode::G_FPTRUNC: {
1446 if (!FromTy || !ToTy)
1453 case TargetOpcode::G_FCMP: {
1457 MI.eraseFromParent();
1460 case TargetOpcode::G_FPTOSI:
1461 case TargetOpcode::G_FPTOUI: {
1465 unsigned ToSize = MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
1466 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1469 FromTy, LocObserver);
1474 case TargetOpcode::G_SITOFP:
1475 case TargetOpcode::G_UITOFP: {
1476 unsigned FromSize = MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
1479 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1481 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SITOFP;
1488 case TargetOpcode::G_ATOMICRMW_XCHG:
1489 case TargetOpcode::G_ATOMICRMW_ADD:
1490 case TargetOpcode::G_ATOMICRMW_SUB:
1491 case TargetOpcode::G_ATOMICRMW_AND:
1492 case TargetOpcode::G_ATOMICRMW_OR:
1493 case TargetOpcode::G_ATOMICRMW_XOR:
1494 case TargetOpcode::G_ATOMIC_CMPXCHG:
1495 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1501 case TargetOpcode::G_BZERO:
1502 case TargetOpcode::G_MEMCPY:
1503 case TargetOpcode::G_MEMMOVE:
1504 case TargetOpcode::G_MEMSET: {
1509 MI.eraseFromParent();
1512 case TargetOpcode::G_GET_FPENV:
1513 case TargetOpcode::G_GET_FPMODE: {
1519 case TargetOpcode::G_SET_FPENV:
1520 case TargetOpcode::G_SET_FPMODE: {
1526 case TargetOpcode::G_RESET_FPENV:
1527 case TargetOpcode::G_RESET_FPMODE: {
1535 MI.eraseFromParent();
1542 uint64_t SizeOp0 = MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
1545 switch (
MI.getOpcode()) {
1548 case TargetOpcode::G_IMPLICIT_DEF: {
1550 LLT DstTy = MRI.getType(DstReg);
1558 if (SizeOp0 % NarrowSize != 0) {
1563 MI.eraseFromParent();
1567 int NumParts = SizeOp0 / NarrowSize;
1570 for (
int i = 0; i < NumParts; ++i)
1574 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1576 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1577 MI.eraseFromParent();
1580 case TargetOpcode::G_CONSTANT: {
1581 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
1582 const APInt &Val =
MI.getOperand(1).getCImm()->getValue();
1583 unsigned TotalSize = Ty.getSizeInBits();
1585 int NumParts = TotalSize / NarrowSize;
1588 for (
int I = 0;
I != NumParts; ++
I) {
1589 unsigned Offset =
I * NarrowSize;
1596 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1598 if (LeftoverBits != 0) {
1602 Val.
lshr(NumParts * NarrowSize).
trunc(LeftoverBits));
1606 insertParts(
MI.getOperand(0).getReg(),
1607 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1609 MI.eraseFromParent();
1612 case TargetOpcode::G_SEXT:
1613 case TargetOpcode::G_ZEXT:
1614 case TargetOpcode::G_ANYEXT:
1616 case TargetOpcode::G_TRUNC: {
1620 uint64_t SizeOp1 = MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
1622 LLVM_DEBUG(
dbgs() <<
"Can't narrow trunc to type " << NarrowTy <<
"\n");
1626 auto Unmerge =
MIRBuilder.buildUnmerge(NarrowTy,
MI.getOperand(1));
1627 MIRBuilder.buildCopy(
MI.getOperand(0), Unmerge.getReg(0));
1628 MI.eraseFromParent();
1631 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1632 case TargetOpcode::G_FREEZE: {
1636 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
1641 auto Unmerge =
MIRBuilder.buildUnmerge(NarrowTy,
MI.getOperand(1).getReg());
1643 for (
unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1645 MIRBuilder.buildInstr(
MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1649 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0).getReg(), Parts);
1650 MI.eraseFromParent();
1653 case TargetOpcode::G_ADD:
1654 case TargetOpcode::G_SUB:
1655 case TargetOpcode::G_SADDO:
1656 case TargetOpcode::G_SSUBO:
1657 case TargetOpcode::G_SADDE:
1658 case TargetOpcode::G_SSUBE:
1659 case TargetOpcode::G_UADDO:
1660 case TargetOpcode::G_USUBO:
1661 case TargetOpcode::G_UADDE:
1662 case TargetOpcode::G_USUBE:
1664 case TargetOpcode::G_MUL:
1665 case TargetOpcode::G_UMULH:
1667 case TargetOpcode::G_EXTRACT:
1669 case TargetOpcode::G_INSERT:
1671 case TargetOpcode::G_LOAD: {
1673 Register DstReg = LoadMI.getDstReg();
1674 LLT DstTy = MRI.getType(DstReg);
1678 if (8 * LoadMI.getMemSize().getValue() != DstTy.
getSizeInBits()) {
1679 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1680 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1682 LoadMI.eraseFromParent();
1688 case TargetOpcode::G_ZEXTLOAD:
1689 case TargetOpcode::G_SEXTLOAD:
1690 case TargetOpcode::G_FPEXTLOAD: {
1692 Register DstReg = LoadMI.getDstReg();
1693 Register PtrReg = LoadMI.getPointerReg();
1695 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1696 auto &MMO = LoadMI.getMMO();
1699 if (MemSize == NarrowSize) {
1701 }
else if (MemSize < NarrowSize) {
1702 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1703 }
else if (MemSize > NarrowSize) {
1715 LoadMI.eraseFromParent();
1718 case TargetOpcode::G_STORE: {
1721 Register SrcReg = StoreMI.getValueReg();
1722 LLT SrcTy = MRI.getType(SrcReg);
1723 if (SrcTy.isVector())
1726 int NumParts = SizeOp0 / NarrowSize;
1728 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1729 if (SrcTy.isVector() && LeftoverBits != 0)
1732 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1733 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1735 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1736 StoreMI.eraseFromParent();
1742 case TargetOpcode::G_FPTRUNCSTORE: {
1744 Register SrcReg = StoreMI.getValueReg();
1745 Register PtrReg = StoreMI.getPointerReg();
1747 auto &MMO = StoreMI.getMMO();
1749 if (MemSize > NarrowSize) {
1753 auto TmpReg =
MIRBuilder.buildFPTrunc(NarrowTy, SrcReg);
1754 if (MemSize == NarrowSize) {
1756 }
else if (MemSize < NarrowSize) {
1757 MIRBuilder.buildStoreInstr(TargetOpcode::G_FPTRUNCSTORE, TmpReg, PtrReg,
1761 StoreMI.eraseFromParent();
1764 case TargetOpcode::G_SELECT:
1766 case TargetOpcode::G_AND:
1767 case TargetOpcode::G_OR:
1768 case TargetOpcode::G_XOR: {
1780 case TargetOpcode::G_SHL:
1781 case TargetOpcode::G_LSHR:
1782 case TargetOpcode::G_ASHR:
1784 case TargetOpcode::G_CTLZ:
1785 case TargetOpcode::G_CTLZ_ZERO_POISON:
1786 case TargetOpcode::G_CTTZ:
1787 case TargetOpcode::G_CTTZ_ZERO_POISON:
1788 case TargetOpcode::G_CTLS:
1789 case TargetOpcode::G_CTPOP:
1791 switch (
MI.getOpcode()) {
1792 case TargetOpcode::G_CTLZ:
1793 case TargetOpcode::G_CTLZ_ZERO_POISON:
1795 case TargetOpcode::G_CTTZ:
1796 case TargetOpcode::G_CTTZ_ZERO_POISON:
1798 case TargetOpcode::G_CTPOP:
1800 case TargetOpcode::G_CTLS:
1810 case TargetOpcode::G_INTTOPTR:
1818 case TargetOpcode::G_PTRTOINT:
1826 case TargetOpcode::G_PHI: {
1829 if (SizeOp0 % NarrowSize != 0)
1832 unsigned NumParts = SizeOp0 / NarrowSize;
1836 for (
unsigned i = 1; i <
MI.getNumOperands(); i += 2) {
1844 for (
unsigned i = 0; i < NumParts; ++i) {
1845 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1847 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1848 for (
unsigned j = 1; j <
MI.getNumOperands(); j += 2)
1849 MIB.
addUse(SrcRegs[j / 2][i]).
add(
MI.getOperand(j + 1));
1852 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), DstRegs);
1854 MI.eraseFromParent();
1857 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1858 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1862 int OpIdx =
MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1868 case TargetOpcode::G_ICMP: {
1870 LLT SrcTy = MRI.getType(LHS);
1876 if (!
extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1882 if (!
extractParts(
MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1883 RHSPartRegs, RHSLeftoverRegs,
MIRBuilder, MRI))
1889 LLT ResTy = MRI.getType(Dst);
1894 auto Zero =
MIRBuilder.buildConstant(NarrowTy, 0);
1896 for (
auto LHSAndRHS :
zip(LHSPartRegs, RHSPartRegs)) {
1897 auto LHS = std::get<0>(LHSAndRHS);
1898 auto RHS = std::get<1>(LHSAndRHS);
1899 auto Xor =
MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1906 for (
auto LHSAndRHS :
zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1907 auto LHS = std::get<0>(LHSAndRHS);
1908 auto RHS = std::get<1>(LHSAndRHS);
1909 auto Xor =
MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1910 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy,
Xor);
1911 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1912 TargetOpcode::G_ZEXT);
1919 assert(Xors.
size() >= 2 &&
"Should have gotten at least two Xors?");
1920 auto Or =
MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1921 for (
unsigned I = 2, E = Xors.
size();
I < E; ++
I)
1926 for (
unsigned I = 0, E = LHSPartRegs.
size();
I != E; ++
I) {
1930 if (
I == E - 1 && LHSLeftoverRegs.
empty()) {
1935 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1939 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[
I],
1942 auto Cmp =
MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[
I],
1945 LHSPartRegs[
I], RHSPartRegs[
I]);
1946 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1952 for (
unsigned I = 0, E = LHSLeftoverRegs.
size();
I != E; ++
I) {
1961 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1965 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[
I],
1966 RHSLeftoverRegs[
I]);
1968 auto Cmp =
MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[
I],
1969 RHSLeftoverRegs[
I]);
1972 LHSLeftoverRegs[
I], RHSLeftoverRegs[
I]);
1973 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1979 MI.eraseFromParent();
1982 case TargetOpcode::G_FCMP:
1991 case TargetOpcode::G_SEXT_INREG: {
1995 int64_t SizeInBits =
MI.getOperand(2).getImm();
2004 auto TruncMIB =
MIRBuilder.buildTrunc(NarrowTy, MO1);
2005 MO1.
setReg(TruncMIB.getReg(0));
2008 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
2020 if (SizeOp0 % NarrowSize != 0)
2022 int NumParts = SizeOp0 / NarrowSize;
2030 for (
int i = 0; i < NumParts; ++i) {
2031 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
2046 for (
int i = 0; i < NumParts; ++i) {
2049 PartialExtensionReg = DstRegs.
back();
2051 assert(PartialExtensionReg &&
2052 "Expected to visit partial extension before full");
2053 if (FullExtensionReg) {
2058 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
2060 FullExtensionReg = DstRegs.
back();
2065 TargetOpcode::G_SEXT_INREG, {NarrowTy},
2068 PartialExtensionReg = DstRegs.
back();
2074 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
2075 MI.eraseFromParent();
2078 case TargetOpcode::G_BSWAP:
2079 case TargetOpcode::G_BITREVERSE: {
2080 if (SizeOp0 % NarrowSize != 0)
2085 unsigned NumParts = SizeOp0 / NarrowSize;
2086 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
2089 for (
unsigned i = 0; i < NumParts; ++i) {
2090 auto DstPart =
MIRBuilder.buildInstr(
MI.getOpcode(), {NarrowTy},
2091 {SrcRegs[NumParts - 1 - i]});
2095 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), DstRegs);
2098 MI.eraseFromParent();
2101 case TargetOpcode::G_PTR_ADD:
2102 case TargetOpcode::G_PTRMASK: {
2110 case TargetOpcode::G_FPTOUI:
2111 case TargetOpcode::G_FPTOSI:
2112 case TargetOpcode::G_FPTOUI_SAT:
2113 case TargetOpcode::G_FPTOSI_SAT:
2115 case TargetOpcode::G_FPEXT:
2122 case TargetOpcode::G_FLDEXP:
2123 case TargetOpcode::G_STRICT_FLDEXP:
2125 case TargetOpcode::G_VSCALE: {
2127 LLT Ty = MRI.getType(Dst);
2131 auto VScaleBase =
MIRBuilder.buildVScale(NarrowTy, One);
2132 auto ZExt =
MIRBuilder.buildZExt(Ty, VScaleBase);
2133 auto C =
MIRBuilder.buildConstant(Ty, *
MI.getOperand(1).getCImm());
2136 MI.eraseFromParent();
2143 LLT Ty = MRI.getType(Val);
2149 if (Ty.isPointer()) {
2150 if (
DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2152 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2158 if (Ty.isPointerVector())
2159 NewVal =
MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2160 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2164 unsigned OpIdx,
unsigned ExtOpcode) {
2166 auto ExtB =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2167 MO.
setReg(ExtB.getReg(0));
2173 auto ExtB =
MIRBuilder.buildTrunc(NarrowTy, MO);
2174 MO.
setReg(ExtB.getReg(0));
2178 unsigned OpIdx,
unsigned TruncOpcode) {
2180 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2182 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2187 unsigned OpIdx,
unsigned ExtOpcode) {
2189 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2191 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2200 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2202 MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
2208 MO.
setReg(
MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
2218 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2225LegalizerHelper::widenScalarMergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
2230 auto [DstReg, DstTy, Src1Reg, Src1Ty] =
MI.getFirst2RegLLTs();
2231 if (DstTy.isVector())
2236 const int SrcSize = SrcTy.getSizeInBits();
2238 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2240 unsigned NumOps =
MI.getNumOperands();
2241 unsigned NumSrc =
MI.getNumOperands() - 1;
2242 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2244 if (WideSize >= DstSize) {
2248 for (
unsigned I = 2;
I !=
NumOps; ++
I) {
2249 const unsigned Offset = (
I - 1) * PartSize;
2262 ResultReg = NextResult;
2265 if (WideSize > DstSize)
2267 else if (DstTy.isPointer())
2270 MI.eraseFromParent();
2295 const int GCD = std::gcd(SrcSize, WideSize);
2305 if (GCD == SrcSize) {
2308 auto Unmerge =
MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2309 for (
int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2315 if (
static_cast<int>(Unmerges.
size()) != NumMerge * WideSize) {
2317 for (
int I = Unmerges.
size();
I != NumMerge * WideSize; ++
I)
2321 const int PartsPerGCD = WideSize / GCD;
2325 for (
int I = 0;
I != NumMerge; ++
I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2327 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2334 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2336 auto FinalMerge =
MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2337 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2340 MI.eraseFromParent();
2345LegalizerHelper::widenScalarUnmergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
2350 int NumDst =
MI.getNumOperands() - 1;
2351 Register SrcReg =
MI.getOperand(NumDst).getReg();
2352 LLT SrcTy = MRI.getType(SrcReg);
2356 Register Dst0Reg =
MI.getOperand(0).getReg();
2357 LLT DstTy = MRI.getType(Dst0Reg);
2366 dbgs() <<
"Not casting non-integral address space integer\n");
2371 SrcReg =
MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2379 SrcReg =
MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2387 for (
int I = 1;
I != NumDst; ++
I) {
2388 auto ShiftAmt =
MIRBuilder.buildConstant(SrcTy, DstSize *
I);
2389 auto Shr =
MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2393 MI.eraseFromParent();
2404 LLVM_DEBUG(
dbgs() <<
"Widening pointer source types not implemented\n");
2408 WideSrc =
MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2411 auto Unmerge =
MIRBuilder.buildUnmerge(WideTy, WideSrc);
2429 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2434 if (PartsPerRemerge == 1) {
2437 for (
int I = 0;
I != NumUnmerge; ++
I) {
2438 auto MIB =
MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2440 for (
int J = 0; J != PartsPerUnmerge; ++J) {
2441 int Idx =
I * PartsPerUnmerge + J;
2443 MIB.addDef(
MI.getOperand(Idx).getReg());
2446 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2450 MIB.addUse(Unmerge.getReg(
I));
2453 SmallVector<Register, 16> Parts;
2454 for (
int J = 0; J != NumUnmerge; ++J)
2455 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2458 for (
int I = 0;
I != NumDst; ++
I) {
2459 for (
int J = 0; J < PartsPerRemerge; ++J) {
2460 const int Idx =
I * PartsPerRemerge + J;
2464 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(
I).getReg(), RemergeParts);
2465 RemergeParts.
clear();
2469 MI.eraseFromParent();
2474LegalizerHelper::widenScalarExtract(
MachineInstr &
MI,
unsigned TypeIdx,
2476 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
2477 unsigned Offset =
MI.getOperand(2).getImm();
2480 if (SrcTy.
isVector() || DstTy.isVector())
2492 Src =
MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2496 if (DstTy.isPointer())
2503 MI.eraseFromParent();
2508 LLT ShiftTy = SrcTy;
2517 MI.eraseFromParent();
2548LegalizerHelper::widenScalarInsert(
MachineInstr &
MI,
unsigned TypeIdx,
2550 if (TypeIdx != 0 || WideTy.
isVector())
2560LegalizerHelper::widenScalarAddSubOverflow(
MachineInstr &
MI,
unsigned TypeIdx,
2564 std::optional<Register> CarryIn;
2565 switch (
MI.getOpcode()) {
2568 case TargetOpcode::G_SADDO:
2569 Opcode = TargetOpcode::G_ADD;
2570 ExtOpcode = TargetOpcode::G_SEXT;
2572 case TargetOpcode::G_SSUBO:
2573 Opcode = TargetOpcode::G_SUB;
2574 ExtOpcode = TargetOpcode::G_SEXT;
2576 case TargetOpcode::G_UADDO:
2577 Opcode = TargetOpcode::G_ADD;
2578 ExtOpcode = TargetOpcode::G_ZEXT;
2580 case TargetOpcode::G_USUBO:
2581 Opcode = TargetOpcode::G_SUB;
2582 ExtOpcode = TargetOpcode::G_ZEXT;
2584 case TargetOpcode::G_SADDE:
2585 Opcode = TargetOpcode::G_UADDE;
2586 ExtOpcode = TargetOpcode::G_SEXT;
2587 CarryIn =
MI.getOperand(4).getReg();
2589 case TargetOpcode::G_SSUBE:
2590 Opcode = TargetOpcode::G_USUBE;
2591 ExtOpcode = TargetOpcode::G_SEXT;
2592 CarryIn =
MI.getOperand(4).getReg();
2594 case TargetOpcode::G_UADDE:
2595 Opcode = TargetOpcode::G_UADDE;
2596 ExtOpcode = TargetOpcode::G_ZEXT;
2597 CarryIn =
MI.getOperand(4).getReg();
2599 case TargetOpcode::G_USUBE:
2600 Opcode = TargetOpcode::G_USUBE;
2601 ExtOpcode = TargetOpcode::G_ZEXT;
2602 CarryIn =
MI.getOperand(4).getReg();
2618 auto LHSExt =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {
MI.getOperand(2)});
2619 auto RHSExt =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {
MI.getOperand(3)});
2623 LLT CarryOutTy = MRI.getType(
MI.getOperand(1).getReg());
2625 .buildInstr(Opcode, {WideTy, CarryOutTy},
2626 {LHSExt, RHSExt, *CarryIn})
2629 NewOp =
MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).
getReg(0);
2631 LLT OrigTy = MRI.getType(
MI.getOperand(0).getReg());
2632 auto TruncOp =
MIRBuilder.buildTrunc(OrigTy, NewOp);
2633 auto ExtOp =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2638 MI.eraseFromParent();
2643LegalizerHelper::widenScalarAddSubShlSat(
MachineInstr &
MI,
unsigned TypeIdx,
2645 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2646 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2647 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2648 bool IsShift =
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2649 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2662 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2669 auto ShiftK =
MIRBuilder.buildConstant(WideTy, SHLAmount);
2673 auto WideInst =
MIRBuilder.buildInstr(
MI.getOpcode(), {WideTy},
2674 {ShiftL, ShiftR},
MI.getFlags());
2679 :
MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2682 MI.eraseFromParent();
2687LegalizerHelper::widenScalarMulo(
MachineInstr &
MI,
unsigned TypeIdx,
2696 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULO;
2698 LLT SrcTy = MRI.getType(
LHS);
2699 LLT OverflowTy = MRI.getType(OriginalOverflow);
2706 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2707 auto LeftOperand =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
LHS});
2708 auto RightOperand =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
RHS});
2715 WideMulCanOverflow ?
MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2717 MachineInstrBuilder Mulo;
2718 if (WideMulCanOverflow)
2719 Mulo =
MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2720 {LeftOperand, RightOperand});
2722 Mulo =
MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2727 MachineInstrBuilder ExtResult;
2734 ExtResult =
MIRBuilder.buildSExtInReg(WideTy,
Mul, SrcBitWidth);
2738 ExtResult =
MIRBuilder.buildZExtInReg(WideTy,
Mul, SrcBitWidth);
2741 if (WideMulCanOverflow) {
2749 MI.eraseFromParent();
2755 unsigned Opcode =
MI.getOpcode();
2759 case TargetOpcode::G_ATOMICRMW_XCHG:
2760 case TargetOpcode::G_ATOMICRMW_ADD:
2761 case TargetOpcode::G_ATOMICRMW_SUB:
2762 case TargetOpcode::G_ATOMICRMW_AND:
2763 case TargetOpcode::G_ATOMICRMW_OR:
2764 case TargetOpcode::G_ATOMICRMW_XOR:
2765 case TargetOpcode::G_ATOMICRMW_MIN:
2766 case TargetOpcode::G_ATOMICRMW_MAX:
2767 case TargetOpcode::G_ATOMICRMW_UMIN:
2768 case TargetOpcode::G_ATOMICRMW_UMAX:
2769 assert(TypeIdx == 0 &&
"atomicrmw with second scalar type");
2775 case TargetOpcode::G_ATOMIC_CMPXCHG:
2776 assert(TypeIdx == 0 &&
"G_ATOMIC_CMPXCHG with second scalar type");
2783 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2793 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2798 case TargetOpcode::G_EXTRACT:
2799 return widenScalarExtract(
MI, TypeIdx, WideTy);
2800 case TargetOpcode::G_INSERT:
2801 return widenScalarInsert(
MI, TypeIdx, WideTy);
2802 case TargetOpcode::G_MERGE_VALUES:
2803 return widenScalarMergeValues(
MI, TypeIdx, WideTy);
2804 case TargetOpcode::G_UNMERGE_VALUES:
2805 return widenScalarUnmergeValues(
MI, TypeIdx, WideTy);
2806 case TargetOpcode::G_SADDO:
2807 case TargetOpcode::G_SSUBO:
2808 case TargetOpcode::G_UADDO:
2809 case TargetOpcode::G_USUBO:
2810 case TargetOpcode::G_SADDE:
2811 case TargetOpcode::G_SSUBE:
2812 case TargetOpcode::G_UADDE:
2813 case TargetOpcode::G_USUBE:
2814 return widenScalarAddSubOverflow(
MI, TypeIdx, WideTy);
2815 case TargetOpcode::G_UMULO:
2816 case TargetOpcode::G_SMULO:
2817 return widenScalarMulo(
MI, TypeIdx, WideTy);
2818 case TargetOpcode::G_SADDSAT:
2819 case TargetOpcode::G_SSUBSAT:
2820 case TargetOpcode::G_SSHLSAT:
2821 case TargetOpcode::G_UADDSAT:
2822 case TargetOpcode::G_USUBSAT:
2823 case TargetOpcode::G_USHLSAT:
2824 return widenScalarAddSubShlSat(
MI, TypeIdx, WideTy);
2825 case TargetOpcode::G_CTTZ:
2826 case TargetOpcode::G_CTTZ_ZERO_POISON:
2827 case TargetOpcode::G_CTLZ:
2828 case TargetOpcode::G_CTLZ_ZERO_POISON:
2829 case TargetOpcode::G_CTLS:
2830 case TargetOpcode::G_CTPOP: {
2843 case TargetOpcode::G_CTTZ:
2844 case TargetOpcode::G_CTTZ_ZERO_POISON:
2845 case TargetOpcode::G_CTLZ_ZERO_POISON:
2846 ExtOpc = TargetOpcode::G_ANYEXT;
2848 case TargetOpcode::G_CTLS:
2849 ExtOpc = TargetOpcode::G_SEXT;
2852 ExtOpc = TargetOpcode::G_ZEXT;
2855 auto MIBSrc =
MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2856 LLT CurTy = MRI.getType(SrcReg);
2857 unsigned NewOpc = Opcode;
2858 if (NewOpc == TargetOpcode::G_CTTZ) {
2865 WideTy, MIBSrc,
MIRBuilder.buildConstant(WideTy, TopBit));
2867 NewOpc = TargetOpcode::G_CTTZ_ZERO_POISON;
2873 if (Opcode == TargetOpcode::G_CTLZ_ZERO_POISON) {
2883 auto MIBNewOp =
MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2885 if (Opcode == TargetOpcode::G_CTLZ || Opcode == TargetOpcode::G_CTLS) {
2890 WideTy, MIBNewOp,
MIRBuilder.buildConstant(WideTy, SizeDiff),
2891 Opcode == TargetOpcode::G_CTLZ
2896 MIRBuilder.buildZExtOrTrunc(
MI.getOperand(0), MIBNewOp);
2897 MI.eraseFromParent();
2900 case TargetOpcode::G_BSWAP: {
2904 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2905 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2906 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2909 MI.getOperand(0).setReg(DstExt);
2913 LLT Ty = MRI.getType(DstReg);
2915 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2916 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2922 case TargetOpcode::G_BITREVERSE: {
2926 LLT Ty = MRI.getType(DstReg);
2929 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2931 MI.getOperand(0).setReg(DstExt);
2934 auto ShiftAmt =
MIRBuilder.buildConstant(WideTy, DiffBits);
2935 auto Shift =
MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2940 case TargetOpcode::G_FREEZE:
2941 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2948 case TargetOpcode::G_ABS:
2955 case TargetOpcode::G_ADD:
2956 case TargetOpcode::G_AND:
2957 case TargetOpcode::G_MUL:
2958 case TargetOpcode::G_OR:
2959 case TargetOpcode::G_XOR:
2960 case TargetOpcode::G_SUB:
2961 case TargetOpcode::G_SHUFFLE_VECTOR:
2972 case TargetOpcode::G_SBFX:
2973 case TargetOpcode::G_UBFX:
2987 case TargetOpcode::G_SHL:
3003 case TargetOpcode::G_ROTR:
3004 case TargetOpcode::G_ROTL:
3013 case TargetOpcode::G_SDIV:
3014 case TargetOpcode::G_SREM:
3015 case TargetOpcode::G_SMIN:
3016 case TargetOpcode::G_SMAX:
3017 case TargetOpcode::G_ABDS:
3025 case TargetOpcode::G_SDIVREM:
3035 case TargetOpcode::G_ASHR:
3036 case TargetOpcode::G_LSHR:
3040 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
3041 : TargetOpcode::G_ZEXT;
3054 case TargetOpcode::G_UDIV:
3055 case TargetOpcode::G_UREM:
3056 case TargetOpcode::G_ABDU:
3063 case TargetOpcode::G_UDIVREM:
3072 case TargetOpcode::G_UMIN:
3073 case TargetOpcode::G_UMAX: {
3074 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
3076 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
3080 ? TargetOpcode::G_SEXT
3081 : TargetOpcode::G_ZEXT;
3091 case TargetOpcode::G_SELECT:
3101 bool IsVec = MRI.getType(
MI.getOperand(1).getReg()).isVector();
3108 case TargetOpcode::G_FPEXT:
3116 case TargetOpcode::G_FPTOSI:
3117 case TargetOpcode::G_FPTOUI:
3118 case TargetOpcode::G_INTRINSIC_LRINT:
3119 case TargetOpcode::G_INTRINSIC_LLRINT:
3120 case TargetOpcode::G_IS_FPCLASS:
3130 case TargetOpcode::G_SITOFP:
3140 case TargetOpcode::G_UITOFP:
3150 case TargetOpcode::G_FPTOSI_SAT:
3151 case TargetOpcode::G_FPTOUI_SAT:
3156 LLT Ty = MRI.getType(OldDst);
3157 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
3159 MI.getOperand(0).setReg(ExtReg);
3160 uint64_t ShortBits = Ty.getScalarSizeInBits();
3163 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3174 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
3175 NewDst =
MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
3183 NewDst =
MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3191 case TargetOpcode::G_LOAD:
3192 case TargetOpcode::G_SEXTLOAD:
3193 case TargetOpcode::G_ZEXTLOAD:
3194 case TargetOpcode::G_FPEXTLOAD:
3200 case TargetOpcode::G_STORE: {
3204 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
3205 assert(!Ty.isPointerOrPointerVector() &&
"Can't widen type");
3206 if (!Ty.isScalar()) {
3214 MI.setMemRefs(MF, {NewMMO});
3221 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3222 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3228 case TargetOpcode::G_FPTRUNCSTORE:
3235 case TargetOpcode::G_CONSTANT: {
3238 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3239 MRI.getType(
MI.getOperand(0).getReg()));
3240 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3241 ExtOpc == TargetOpcode::G_ANYEXT) &&
3244 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3248 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
3254 case TargetOpcode::G_FCONSTANT: {
3260 auto IntCst =
MIRBuilder.buildConstant(
MI.getOperand(0).getReg(), Val);
3262 MI.eraseFromParent();
3265 case TargetOpcode::G_IMPLICIT_DEF: {
3271 case TargetOpcode::G_BRCOND:
3277 case TargetOpcode::G_FCMP:
3288 case TargetOpcode::G_ICMP:
3293 LLT SrcTy = MRI.getType(
MI.getOperand(2).getReg());
3297 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
3298 unsigned ExtOpcode =
3302 ? TargetOpcode::G_SEXT
3303 : TargetOpcode::G_ZEXT;
3310 case TargetOpcode::G_PTR_ADD:
3311 assert(TypeIdx == 1 &&
"unable to legalize pointer of G_PTR_ADD");
3317 case TargetOpcode::G_PHI: {
3318 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
3321 for (
unsigned I = 1;
I <
MI.getNumOperands();
I += 2) {
3333 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3336 LLT VecTy = MRI.getType(VecReg);
3340 TargetOpcode::G_ANYEXT);
3354 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3370 LLT VecTy = MRI.getType(VecReg);
3389 case TargetOpcode::G_FADD:
3390 case TargetOpcode::G_FMUL:
3391 case TargetOpcode::G_FSUB:
3392 case TargetOpcode::G_FMA:
3393 case TargetOpcode::G_FMAD:
3394 case TargetOpcode::G_FNEG:
3395 case TargetOpcode::G_FABS:
3396 case TargetOpcode::G_FCANONICALIZE:
3397 case TargetOpcode::G_FMINNUM:
3398 case TargetOpcode::G_FMAXNUM:
3399 case TargetOpcode::G_FMINNUM_IEEE:
3400 case TargetOpcode::G_FMAXNUM_IEEE:
3401 case TargetOpcode::G_FMINIMUM:
3402 case TargetOpcode::G_FMAXIMUM:
3403 case TargetOpcode::G_FMINIMUMNUM:
3404 case TargetOpcode::G_FMAXIMUMNUM:
3405 case TargetOpcode::G_FDIV:
3406 case TargetOpcode::G_FREM:
3407 case TargetOpcode::G_FCEIL:
3408 case TargetOpcode::G_FFLOOR:
3409 case TargetOpcode::G_FCOS:
3410 case TargetOpcode::G_FSIN:
3411 case TargetOpcode::G_FTAN:
3412 case TargetOpcode::G_FACOS:
3413 case TargetOpcode::G_FASIN:
3414 case TargetOpcode::G_FATAN:
3415 case TargetOpcode::G_FATAN2:
3416 case TargetOpcode::G_FCOSH:
3417 case TargetOpcode::G_FSINH:
3418 case TargetOpcode::G_FTANH:
3419 case TargetOpcode::G_FLOG10:
3420 case TargetOpcode::G_FLOG:
3421 case TargetOpcode::G_FLOG2:
3422 case TargetOpcode::G_FRINT:
3423 case TargetOpcode::G_FNEARBYINT:
3424 case TargetOpcode::G_FSQRT:
3425 case TargetOpcode::G_FEXP:
3426 case TargetOpcode::G_FEXP2:
3427 case TargetOpcode::G_FEXP10:
3428 case TargetOpcode::G_FPOW:
3429 case TargetOpcode::G_INTRINSIC_TRUNC:
3430 case TargetOpcode::G_INTRINSIC_ROUND:
3431 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3435 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E; ++
I)
3441 case TargetOpcode::G_FMODF: {
3451 case TargetOpcode::G_FPOWI:
3452 case TargetOpcode::G_FLDEXP:
3453 case TargetOpcode::G_STRICT_FLDEXP: {
3455 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3476 case TargetOpcode::G_FFREXP: {
3489 case TargetOpcode::G_LROUND:
3490 case TargetOpcode::G_LLROUND:
3501 case TargetOpcode::G_INTTOPTR:
3509 case TargetOpcode::G_PTRTOINT:
3517 case TargetOpcode::G_BUILD_VECTOR: {
3521 for (
int I = 1, E =
MI.getNumOperands();
I != E; ++
I)
3527 MI.setDesc(
MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3535 case TargetOpcode::G_SEXT_INREG:
3544 case TargetOpcode::G_PTRMASK: {
3552 case TargetOpcode::G_VECREDUCE_ADD: {
3561 case TargetOpcode::G_VECREDUCE_FADD:
3562 case TargetOpcode::G_VECREDUCE_FMUL:
3563 case TargetOpcode::G_VECREDUCE_FMIN:
3564 case TargetOpcode::G_VECREDUCE_FMAX:
3565 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3566 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3571 LLT VecTy = MRI.getType(VecReg);
3578 case TargetOpcode::G_VSCALE: {
3585 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
3590 case TargetOpcode::G_SPLAT_VECTOR: {
3599 case TargetOpcode::G_INSERT_SUBVECTOR: {
3607 LLT SubVecTy = MRI.getType(SubVec);
3611 auto BigZExt =
MIRBuilder.buildZExt(WideTy, BigVec);
3612 auto SubZExt =
MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3613 auto WideInsert =
MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3617 auto SplatZero =
MIRBuilder.buildSplatVector(
3622 MI.eraseFromParent();
3631 auto Unmerge =
B.buildUnmerge(Ty, Src);
3632 for (
int I = 0,
E = Unmerge->getNumOperands() - 1;
I !=
E; ++
I)
3641 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
3655 MIRBuilder.
buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3664 MI.eraseFromParent();
3675 MI.eraseFromParent();
3682 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
3683 if (SrcTy.isVector()) {
3687 if (DstTy.isVector()) {
3688 int NumDstElt = DstTy.getNumElements();
3689 int NumSrcElt = SrcTy.getNumElements();
3692 LLT DstCastTy = DstEltTy;
3693 LLT SrcPartTy = SrcEltTy;
3697 if (NumSrcElt < NumDstElt) {
3708 SrcPartTy = SrcEltTy;
3709 }
else if (NumSrcElt > NumDstElt) {
3721 DstCastTy = DstEltTy;
3726 SrcReg =
MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3730 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3731 MI.eraseFromParent();
3735 if (DstTy.isVector()) {
3738 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3739 MI.eraseFromParent();
3755 unsigned NewEltSize,
3756 unsigned OldEltSize) {
3757 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3758 LLT IdxTy =
B.getMRI()->getType(Idx);
3761 auto OffsetMask =
B.buildConstant(
3763 auto OffsetIdx =
B.buildAnd(IdxTy, Idx, OffsetMask);
3764 return B.buildShl(IdxTy, OffsetIdx,
3765 B.buildConstant(IdxTy,
Log2_32(OldEltSize))).getReg(0);
3780 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] =
MI.getFirst3RegLLTs();
3784 unsigned OldNumElts = SrcVecTy.getNumElements();
3791 if (NewNumElts > OldNumElts) {
3802 if (NewNumElts % OldNumElts != 0)
3806 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3810 auto NewEltsPerOldEltK =
MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3813 auto NewBaseIdx =
MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3815 for (
unsigned I = 0;
I < NewEltsPerOldElt; ++
I) {
3816 auto IdxOffset =
MIRBuilder.buildConstant(IdxTy,
I);
3817 auto TmpIdx =
MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3818 auto Elt =
MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3819 NewOps[
I] = Elt.getReg(0);
3822 auto NewVec =
MIRBuilder.buildBuildVector(MidTy, NewOps);
3824 MI.eraseFromParent();
3828 if (NewNumElts < OldNumElts) {
3829 if (NewEltSize % OldEltSize != 0)
3851 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3852 auto Log2Ratio =
MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3855 auto ScaledIdx =
MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3859 WideElt =
MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3860 ScaledIdx).getReg(0);
3868 auto ExtractedBits =
MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3870 MI.eraseFromParent();
3884 LLT TargetTy =
B.getMRI()->getType(TargetReg);
3885 LLT InsertTy =
B.getMRI()->getType(InsertReg);
3886 auto ZextVal =
B.buildZExt(TargetTy, InsertReg);
3887 auto ShiftedInsertVal =
B.buildShl(TargetTy, ZextVal, OffsetBits);
3890 auto EltMask =
B.buildConstant(
3894 auto ShiftedMask =
B.buildShl(TargetTy, EltMask, OffsetBits);
3895 auto InvShiftedMask =
B.buildNot(TargetTy, ShiftedMask);
3898 auto MaskedOldElt =
B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3902 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3916 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3917 MI.getFirst4RegLLTs();
3929 if (NewNumElts < OldNumElts) {
3930 if (NewEltSize % OldEltSize != 0)
3939 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3940 auto Log2Ratio =
MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3943 auto ScaledIdx =
MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3947 ExtractedElt =
MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3948 ScaledIdx).getReg(0);
3958 InsertedElt =
MIRBuilder.buildInsertVectorElement(
3959 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3963 MI.eraseFromParent();
3993 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
3997 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3998 return UnableToLegalize;
4003 for (
unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
4005 MIRBuilder.
buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
4014 MI.eraseFromParent();
4032 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
4033 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
4043 auto Inp1 =
MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
4044 auto Inp2 =
MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
4046 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
4047 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
4049 MI.eraseFromParent();
4079 LLT DstTy = MRI.getType(Dst);
4080 LLT SrcTy = MRI.getType(Src);
4086 if (DstTy == CastTy)
4094 if (CastEltSize < DstEltSize)
4097 auto AdjustAmt = CastEltSize / DstEltSize;
4098 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4099 SrcTyMinElts % AdjustAmt != 0)
4104 auto CastVec =
MIRBuilder.buildBitcast(SrcTy, Src);
4105 auto PromotedES =
MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
4108 ES->eraseFromParent();
4143 LLT DstTy = MRI.getType(Dst);
4144 LLT BigVecTy = MRI.getType(BigVec);
4145 LLT SubVecTy = MRI.getType(SubVec);
4147 if (DstTy == CastTy)
4162 if (CastEltSize < DstEltSize)
4165 auto AdjustAmt = CastEltSize / DstEltSize;
4166 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4167 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
4173 auto CastBigVec =
MIRBuilder.buildBitcast(BigVecTy, BigVec);
4174 auto CastSubVec =
MIRBuilder.buildBitcast(SubVecTy, SubVec);
4176 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
4179 ES->eraseFromParent();
4187 LLT DstTy = MRI.getType(DstReg);
4197 if (MemSizeInBits != MemStoreSizeInBits) {
4214 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
4218 auto NewLoad =
MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4219 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
4221 auto NewLoad =
MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4224 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4226 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4229 if (DstTy != LoadTy)
4237 if (
MIRBuilder.getDataLayout().isBigEndian())
4255 uint64_t LargeSplitSize, SmallSplitSize;
4260 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4267 if (TLI.allowsMemoryAccess(Ctx,
MIRBuilder.getDataLayout(), MemTy, MMO))
4270 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4281 if (Alignment.
value() * 8 > MemSizeInBits &&
4286 auto NewLoad =
MIRBuilder.buildLoad(MoreTy, PtrReg, *NewMMO);
4303 LLT PtrTy = MRI.getType(PtrReg);
4316 auto LargeLoad =
MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4319 auto OffsetCst =
MIRBuilder.buildConstant(OffsetCstRes, LargeSplitSize / 8);
4320 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4321 auto SmallPtr =
MIRBuilder.buildObjectPtrOffset(PtrAddReg, PtrReg, OffsetCst);
4323 SmallPtr, *SmallMMO);
4325 auto ShiftAmt =
MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4326 auto Shift =
MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4328 if (AnyExtTy == DstTy)
4329 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4331 auto Or =
MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4335 auto Or =
MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4355 LLT SrcTy = MRI.getType(SrcReg);
4363 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4369 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4371 SrcReg =
MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4375 auto ZextInReg =
MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4379 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4394 uint64_t LargeSplitSize, SmallSplitSize;
4401 if (TLI.allowsMemoryAccess(Ctx,
MIRBuilder.getDataLayout(), MemTy, MMO))
4404 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4413 if (SrcTy.isPointer()) {
4415 SrcReg =
MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4418 auto ExtVal =
MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4421 auto ShiftAmt =
MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4422 auto SmallVal =
MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4425 LLT PtrTy = MRI.getType(PtrReg);
4427 LargeSplitSize / 8);
4428 auto SmallPtr =
MIRBuilder.buildObjectPtrOffset(PtrTy, PtrReg, OffsetCst);
4434 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4435 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4444 LLT SrcTy = MRI.getType(SrcReg);
4450 assert(SrcTy.isVector() &&
"Expect a vector store type");
4457 auto CurrVal =
MIRBuilder.buildConstant(IntTy, 0);
4461 auto Elt =
MIRBuilder.buildExtractVectorElement(
4462 SrcTy.getElementType(), SrcReg,
MIRBuilder.buildConstant(IdxTy,
I));
4463 auto Trunc =
MIRBuilder.buildTrunc(MemScalarTy, Elt);
4464 auto ZExt =
MIRBuilder.buildZExt(IntTy, Trunc);
4470 auto Shifted =
MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4471 CurrVal =
MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4475 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4486 switch (
MI.getOpcode()) {
4487 case TargetOpcode::G_LOAD: {
4505 case TargetOpcode::G_STORE: {
4521 case TargetOpcode::G_SELECT: {
4525 if (MRI.getType(
MI.getOperand(1).getReg()).isVector()) {
4527 dbgs() <<
"bitcast action not implemented for vector select\n");
4538 case TargetOpcode::G_AND:
4539 case TargetOpcode::G_OR:
4540 case TargetOpcode::G_XOR: {
4548 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4550 case TargetOpcode::G_INSERT_VECTOR_ELT:
4552 case TargetOpcode::G_CONCAT_VECTORS:
4554 case TargetOpcode::G_SHUFFLE_VECTOR:
4556 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4558 case TargetOpcode::G_INSERT_SUBVECTOR:
4566void LegalizerHelper::changeOpcode(
MachineInstr &
MI,
unsigned NewOpcode) {
4576 switch(
MI.getOpcode()) {
4579 case TargetOpcode::G_FCONSTANT:
4581 case TargetOpcode::G_BITCAST:
4583 case TargetOpcode::G_SREM:
4584 case TargetOpcode::G_UREM: {
4585 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4587 MIRBuilder.buildInstr(
MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4588 {MI.getOperand(1), MI.getOperand(2)});
4590 auto Prod =
MIRBuilder.buildMul(Ty, Quot,
MI.getOperand(2));
4592 MI.eraseFromParent();
4595 case TargetOpcode::G_SADDO:
4596 case TargetOpcode::G_SSUBO:
4598 case TargetOpcode::G_SADDE:
4600 case TargetOpcode::G_SSUBE:
4602 case TargetOpcode::G_UMULH:
4603 case TargetOpcode::G_SMULH:
4605 case TargetOpcode::G_SMULO:
4606 case TargetOpcode::G_UMULO: {
4609 auto [Res, Overflow, LHS, RHS] =
MI.getFirst4Regs();
4610 LLT Ty = MRI.getType(Res);
4612 unsigned Opcode =
MI.getOpcode() == TargetOpcode::G_SMULO
4613 ? TargetOpcode::G_SMULH
4614 : TargetOpcode::G_UMULH;
4618 MI.setDesc(
TII.get(TargetOpcode::G_MUL));
4619 MI.removeOperand(1);
4622 auto HiPart =
MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4630 if (Opcode == TargetOpcode::G_SMULH) {
4631 auto ShiftAmt =
MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4632 auto Shifted =
MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4639 case TargetOpcode::G_FNEG: {
4640 auto [Res, ResTy, SubByReg, SubByRegTy] =
MI.getFirst2RegLLTs();
4643 Register CastedSubByReg = SubByReg;
4645 if (!SubByRegTy.getScalarType().isAnyScalar() &&
4646 !SubByRegTy.getScalarType().isInteger()) {
4647 auto BitcastDst = SubByRegTy.changeElementType(
4649 CastedSubByReg =
MIRBuilder.buildBitcast(BitcastDst, SubByReg).getReg(0);
4655 if (ResTy != TyInt) {
4657 MIRBuilder.buildXor(TyInt, CastedSubByReg, SignMask).getReg(0);
4660 MIRBuilder.buildXor(Res, CastedSubByReg, SignMask).getReg(0);
4662 MI.eraseFromParent();
4665 case TargetOpcode::G_FSUB:
4666 case TargetOpcode::G_STRICT_FSUB: {
4667 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
4668 LLT Ty = MRI.getType(Res);
4673 if (
MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4674 MIRBuilder.buildStrictFAdd(Res, LHS, Neg,
MI.getFlags());
4678 MI.eraseFromParent();
4681 case TargetOpcode::G_FMAD:
4683 case TargetOpcode::G_FFLOOR:
4685 case TargetOpcode::G_LROUND:
4686 case TargetOpcode::G_LLROUND: {
4689 LLT SrcTy = MRI.getType(SrcReg);
4690 auto Round =
MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4693 MI.eraseFromParent();
4696 case TargetOpcode::G_INTRINSIC_ROUND:
4698 case TargetOpcode::G_FRINT: {
4701 changeOpcode(
MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4704 case TargetOpcode::G_INTRINSIC_LRINT:
4705 case TargetOpcode::G_INTRINSIC_LLRINT: {
4708 LLT SrcTy = MRI.getType(SrcReg);
4710 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4712 MI.eraseFromParent();
4715 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4716 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] =
MI.getFirst5Regs();
4717 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4718 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4719 **
MI.memoperands_begin());
4721 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4722 MI.eraseFromParent();
4725 case TargetOpcode::G_LOAD:
4726 case TargetOpcode::G_SEXTLOAD:
4727 case TargetOpcode::G_ZEXTLOAD:
4729 case TargetOpcode::G_STORE:
4731 case TargetOpcode::G_CTLZ_ZERO_POISON:
4732 case TargetOpcode::G_CTTZ_ZERO_POISON:
4733 case TargetOpcode::G_CTLZ:
4734 case TargetOpcode::G_CTTZ:
4735 case TargetOpcode::G_CTPOP:
4736 case TargetOpcode::G_CTLS:
4739 auto [Res, CarryOut, LHS, RHS] =
MI.getFirst4Regs();
4741 Register NewRes = MRI.cloneVirtualRegister(Res);
4748 MI.eraseFromParent();
4752 auto [Res, CarryOut, LHS, RHS, CarryIn] =
MI.getFirst5Regs();
4753 const LLT CondTy = MRI.getType(CarryOut);
4754 const LLT Ty = MRI.getType(Res);
4756 Register NewRes = MRI.cloneVirtualRegister(Res);
4759 auto TmpRes =
MIRBuilder.buildAdd(Ty, LHS, RHS);
4765 auto ZExtCarryIn =
MIRBuilder.buildZExt(Ty, CarryIn);
4766 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4773 auto Carry2 =
MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4778 MI.eraseFromParent();
4782 auto [Res, BorrowOut, LHS, RHS] =
MI.getFirst4Regs();
4787 MI.eraseFromParent();
4791 auto [Res, BorrowOut, LHS, RHS, BorrowIn] =
MI.getFirst5Regs();
4792 const LLT CondTy = MRI.getType(BorrowOut);
4793 const LLT Ty = MRI.getType(Res);
4796 auto TmpRes =
MIRBuilder.buildSub(Ty, LHS, RHS);
4802 auto ZExtBorrowIn =
MIRBuilder.buildZExt(Ty, BorrowIn);
4803 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4810 auto Borrow2 =
MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4811 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4813 MI.eraseFromParent();
4853 case G_MERGE_VALUES:
4855 case G_UNMERGE_VALUES:
4857 case TargetOpcode::G_SEXT_INREG: {
4858 assert(
MI.getOperand(2).isImm() &&
"Expected immediate");
4859 int64_t SizeInBits =
MI.getOperand(2).getImm();
4861 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
4862 LLT DstTy = MRI.getType(DstReg);
4863 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4866 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4867 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4868 MI.eraseFromParent();
4871 case G_EXTRACT_VECTOR_ELT:
4872 case G_INSERT_VECTOR_ELT:
4874 case G_SHUFFLE_VECTOR:
4876 case G_VECTOR_COMPRESS:
4878 case G_DYN_STACKALLOC:
4882 case G_STACKRESTORE:
4892 case G_READ_REGISTER:
4893 case G_WRITE_REGISTER:
4900 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4901 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4907 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4912 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4923 bool IsSigned =
MI.getOpcode() == G_ABDS;
4924 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4925 if ((IsSigned && LI.isLegal({G_SMIN, Ty}) && LI.isLegal({G_SMAX, Ty})) ||
4926 (!IsSigned && LI.isLegal({G_UMIN, Ty}) && LI.isLegal({G_UMAX, Ty}))) {
4950 case G_MEMCPY_INLINE:
4951 return lowerMemcpyInline(
MI);
4962 case G_ATOMICRMW_SUB: {
4963 auto [Ret, Mem, Val] =
MI.getFirst3Regs();
4964 const LLT ValTy = MRI.getType(Val);
4968 MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, Ret, Mem, VNeg, *MMO);
4969 MI.eraseFromParent();
4995 unsigned AddrSpace =
DL.getAllocaAddrSpace();
4999 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
5005 Align StackTypeAlign =
5012 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
5013 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
5018 LLT IdxTy =
B.getMRI()->getType(IdxReg);
5030 return B.buildAnd(IdxTy, IdxReg,
B.buildConstant(IdxTy, Imm)).getReg(0);
5033 return B.buildUMin(IdxTy, IdxReg,
B.buildConstant(IdxTy, NElts - 1))
5044 "Converting bits to bytes lost precision");
5050 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
5051 unsigned IndexSizeInBits =
DL.getIndexSize(AS) * 8;
5053 if (IdxTy != MRI.getType(Index))
5054 Index =
MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
5059 LLT PtrTy = MRI.getType(VecPtr);
5060 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr,
Mul).getReg(0);
5068 std::initializer_list<unsigned> NonVecOpIndices) {
5069 if (
MI.getNumMemOperands() != 0)
5086 if (!Ty.isVector()) {
5092 if (Ty.getNumElements() != NumElts)
5107 assert(Ty.isVector() &&
"Expected vector type");
5109 int NumParts, NumLeftover;
5110 std::tie(NumParts, NumLeftover) =
5113 assert(NumParts > 0 &&
"Error in getNarrowTypeBreakDown");
5114 for (
int i = 0; i < NumParts; ++i) {
5119 assert(NumLeftover == 1 &&
"expected exactly one leftover");
5128 for (
unsigned i = 0; i <
N; ++i) {
5130 Ops.push_back(
Op.getReg());
5131 else if (
Op.isImm())
5132 Ops.push_back(
Op.getImm());
5133 else if (
Op.isPredicate())
5155 std::initializer_list<unsigned> NonVecOpIndices) {
5157 "Non-compatible opcode or not specified non-vector operands");
5158 unsigned OrigNumElts = MRI.getType(
MI.getReg(0)).getNumElements();
5160 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
5161 unsigned NumDefs =
MI.getNumDefs();
5169 for (
unsigned i = 0; i < NumDefs; ++i) {
5170 makeDstOps(OutputOpsPieces[i], MRI.getType(
MI.getReg(i)), NumElts);
5178 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
5179 ++UseIdx, ++UseNo) {
5182 MI.getOperand(UseIdx));
5191 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5195 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5197 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5198 Defs.
push_back(OutputOpsPieces[DstNo][i]);
5201 for (
unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
5202 Uses.push_back(InputOpsPieces[InputNo][i]);
5205 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5206 OutputRegs[DstNo].push_back(
I.getReg(DstNo));
5211 for (
unsigned i = 0; i < NumDefs; ++i)
5212 mergeMixedSubvectors(
MI.getReg(i), OutputRegs[i]);
5214 for (
unsigned i = 0; i < NumDefs; ++i)
5215 MIRBuilder.buildMergeLikeInstr(
MI.getReg(i), OutputRegs[i]);
5218 MI.eraseFromParent();
5225 unsigned OrigNumElts = MRI.getType(
MI.getReg(0)).getNumElements();
5227 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
5228 unsigned NumDefs =
MI.getNumDefs();
5232 makeDstOps(OutputOpsPieces, MRI.getType(
MI.getReg(0)), NumElts);
5237 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
5238 UseIdx += 2, ++UseNo) {
5246 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5248 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5249 auto Phi =
MIRBuilder.buildInstr(TargetOpcode::G_PHI);
5251 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
5254 for (
unsigned j = 0; j < NumInputs / 2; ++j) {
5255 Phi.addUse(InputOpsPieces[j][i]);
5256 Phi.add(
MI.getOperand(1 + j * 2 + 1));
5266 mergeMixedSubvectors(
MI.getReg(0), OutputRegs);
5268 MIRBuilder.buildMergeLikeInstr(
MI.getReg(0), OutputRegs);
5271 MI.eraseFromParent();
5279 const int NumDst =
MI.getNumOperands() - 1;
5280 const Register SrcReg =
MI.getOperand(NumDst).getReg();
5281 LLT DstTy = MRI.getType(
MI.getOperand(0).getReg());
5282 LLT SrcTy = MRI.getType(SrcReg);
5284 if (TypeIdx != 1 || NarrowTy == DstTy)
5291 assert(SrcTy.isVector() && NarrowTy.
isVector() &&
"Expected vector types");
5294 if ((SrcTy.getSizeInBits() % NarrowTy.
getSizeInBits() != 0) ||
5308 auto Unmerge =
MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5309 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5310 const int PartsPerUnmerge = NumDst / NumUnmerge;
5312 for (
int I = 0;
I != NumUnmerge; ++
I) {
5313 auto MIB =
MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5315 for (
int J = 0; J != PartsPerUnmerge; ++J)
5316 MIB.addDef(
MI.getOperand(
I * PartsPerUnmerge + J).getReg());
5317 MIB.addUse(Unmerge.getReg(
I));
5320 MI.eraseFromParent();
5327 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
5331 assert(DstTy.isVector() && NarrowTy.
isVector() &&
"Expected vector types");
5333 if (NarrowTy == SrcTy)
5341 assert(SrcTy.isVector() &&
"Expected vector types");
5343 if ((DstTy.getSizeInBits() % NarrowTy.
getSizeInBits() != 0) ||
5357 for (
unsigned i = 1; i <
MI.getNumOperands(); ++i) {
5358 auto Unmerge =
MIRBuilder.buildUnmerge(EltTy,
MI.getOperand(i).getReg());
5359 for (
unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5365 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5366 for (
unsigned i = 0,
Offset = 0; i < NumNarrowTyPieces;
5367 ++i,
Offset += NumNarrowTyElts) {
5370 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5373 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5374 MI.eraseFromParent();
5378 assert(TypeIdx == 0 &&
"Bad type index");
5379 if ((NarrowTy.
getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5394 unsigned NumParts = DstTy.getNumElements() / NarrowTy.
getNumElements();
5395 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5397 for (
unsigned i = 0; i < NumParts; ++i) {
5399 for (
unsigned j = 0; j < NumElts; ++j)
5400 Sources.
push_back(
MI.getOperand(1 + i * NumElts + j).getReg());
5402 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5405 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5406 MI.eraseFromParent();
5414 auto [DstReg, SrcVec] =
MI.getFirst2Regs();
5416 bool IsInsert =
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5418 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) &&
"not a vector type index");
5420 InsertVal =
MI.getOperand(2).getReg();
5422 Register Idx =
MI.getOperand(
MI.getNumOperands() - 1).getReg();
5423 LLT VecTy = MRI.getType(SrcVec);
5429 uint64_t IdxVal = MaybeCst->Value.getZExtValue();
5433 MI.eraseFromParent();
5442 SplitPieces[IdxVal] = InsertVal;
5443 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0).getReg(), SplitPieces);
5445 MIRBuilder.buildCopy(
MI.getOperand(0).getReg(), SplitPieces[IdxVal]);
5449 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5452 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5453 TargetOpcode::G_ANYEXT);
5457 LLT IdxTy = MRI.getType(Idx);
5458 int64_t PartIdx = IdxVal / NewNumElts;
5460 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5463 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5466 auto InsertPart =
MIRBuilder.buildInsertVectorElement(
5467 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5468 VecParts[PartIdx] = InsertPart.getReg(0);
5472 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5474 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5478 MI.eraseFromParent();
5498 LLVM_DEBUG(
dbgs() <<
"Can't narrow load/store to non-byte-sized type\n");
5510 LLT ValTy = MRI.getType(ValReg);
5519 int NumLeftover = -1;
5525 if (
extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5527 NumParts = NarrowRegs.
size();
5528 NumLeftover = NarrowLeftoverRegs.
size();
5535 LLT PtrTy = MRI.getType(AddrReg);
5545 auto MMO = LdStMI.
getMMO();
5547 unsigned NumParts,
unsigned Offset) ->
unsigned {
5550 for (
unsigned Idx = 0, E = NumParts; Idx != E &&
Offset < TotalSize;
5552 unsigned ByteOffset =
Offset / 8;
5555 MIRBuilder.materializeObjectPtrOffset(NewAddrReg, AddrReg, OffsetTy,
5562 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5563 ValRegs.push_back(Dst);
5564 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5566 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5575 unsigned HandledOffset =
5576 splitTypePieces(NarrowTy, NarrowRegs, NumParts,
Offset);
5580 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5583 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5584 LeftoverTy, NarrowLeftoverRegs);
5598 switch (
MI.getOpcode()) {
5599 case G_IMPLICIT_DEF:
5615 case G_FCANONICALIZE:
5632 case G_INTRINSIC_LRINT:
5633 case G_INTRINSIC_LLRINT:
5634 case G_INTRINSIC_ROUND:
5635 case G_INTRINSIC_ROUNDEVEN:
5638 case G_INTRINSIC_TRUNC:
5666 case G_FMINNUM_IEEE:
5667 case G_FMAXNUM_IEEE:
5689 case G_CTLZ_ZERO_POISON:
5691 case G_CTTZ_ZERO_POISON:
5708 case G_ADDRSPACE_CAST:
5721 case G_STRICT_FLDEXP:
5723 case G_TRUNC_SSAT_S:
5724 case G_TRUNC_SSAT_U:
5725 case G_TRUNC_USAT_U:
5733 if (MRI.getType(
MI.getOperand(1).getReg()).isVector())
5738 case G_UNMERGE_VALUES:
5740 case G_BUILD_VECTOR:
5741 assert(TypeIdx == 0 &&
"not a vector type index");
5743 case G_CONCAT_VECTORS:
5747 case G_EXTRACT_VECTOR_ELT:
5748 case G_INSERT_VECTOR_ELT:
5757 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5758 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5760 case G_SHUFFLE_VECTOR:
5766 case G_INTRINSIC_FPTRUNC_ROUND:
5776 assert(
MI.getOpcode() == TargetOpcode::G_BITCAST &&
5777 "Not a bitcast operation");
5782 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
5784 unsigned NewElemCount =
5787 if (NewElemCount == 1) {
5790 auto Unmerge =
MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg);
5797 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5806 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5807 MI.eraseFromParent();
5813 assert(
MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5817 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5818 MI.getFirst3RegLLTs();
5821 if (DstTy != Src1Ty)
5823 if (DstTy != Src2Ty)
5838 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5854 unsigned InputUsed[2] = {-1U, -1U};
5855 unsigned FirstMaskIdx =
High * NewElts;
5856 bool UseBuildVector =
false;
5857 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5859 int Idx = Mask[FirstMaskIdx + MaskOffset];
5864 if (
Input >= std::size(Inputs)) {
5871 Idx -=
Input * NewElts;
5875 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5876 if (InputUsed[OpNo] ==
Input) {
5879 }
else if (InputUsed[OpNo] == -1U) {
5881 InputUsed[OpNo] =
Input;
5886 if (OpNo >= std::size(InputUsed)) {
5889 UseBuildVector =
true;
5894 Ops.push_back(Idx + OpNo * NewElts);
5897 if (UseBuildVector) {
5902 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5904 int Idx = Mask[FirstMaskIdx + MaskOffset];
5909 if (
Input >= std::size(Inputs)) {
5916 Idx -=
Input * NewElts;
5920 .buildExtractVectorElement(
5921 EltTy, Inputs[
Input],
5927 Output =
MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5928 }
else if (InputUsed[0] == -1U) {
5930 Output =
MIRBuilder.buildUndef(NarrowTy).getReg(0);
5931 }
else if (NewElts == 1) {
5932 Output =
MIRBuilder.buildCopy(NarrowTy, Inputs[InputUsed[0]]).getReg(0);
5934 Register Op0 = Inputs[InputUsed[0]];
5938 : Inputs[InputUsed[1]];
5940 Output =
MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1,
Ops).getReg(0);
5947 MI.eraseFromParent();
5960 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5966 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5969 const unsigned NumParts =
5971 : SrcTy.getNumElements();
5975 if (DstTy != NarrowTy)
5981 unsigned NumPartsLeft = NumParts;
5982 while (NumPartsLeft > 1) {
5983 for (
unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5986 .buildInstr(ScalarOpc, {NarrowTy},
5987 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5990 SplitSrcs = PartialResults;
5991 PartialResults.
clear();
5992 NumPartsLeft = SplitSrcs.
size();
5996 MI.eraseFromParent();
6001 for (
unsigned Idx = 1; Idx < NumParts; ++Idx)
6002 Acc =
MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
6005 MI.eraseFromParent();
6009 for (
unsigned Part = 0; Part < NumParts; ++Part) {
6011 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
6019 return tryNarrowPow2Reduction(
MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
6022 Register Acc = PartialReductions[0];
6023 for (
unsigned Part = 1; Part < NumParts; ++Part) {
6024 if (Part == NumParts - 1) {
6026 {Acc, PartialReductions[Part]});
6029 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
6033 MI.eraseFromParent();
6039 unsigned int TypeIdx,
6041 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
6042 MI.getFirst3RegLLTs();
6043 if (!NarrowTy.
isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
6047 assert((
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
6048 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
6049 "Unexpected vecreduce opcode");
6050 unsigned ScalarOpc =
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
6051 ? TargetOpcode::G_FADD
6052 : TargetOpcode::G_FMUL;
6055 unsigned NumParts = SrcTy.getNumElements();
6058 for (
unsigned i = 0; i < NumParts; i++)
6059 Acc =
MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
6063 MI.eraseFromParent();
6070 unsigned ScalarOpc) {
6078 while (SplitSrcs.
size() > 1) {
6080 for (
unsigned Idx = 0; Idx < SplitSrcs.
size()-1; Idx += 2) {
6088 SplitSrcs = std::move(PartialRdxs);
6092 MI.getOperand(1).setReg(SplitSrcs[0]);
6099 const LLT HalfTy,
const LLT AmtTy) {
6101 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6102 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6106 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), {InL, InH});
6107 MI.eraseFromParent();
6113 unsigned VTBits = 2 * NVTBits;
6116 if (
MI.getOpcode() == TargetOpcode::G_SHL) {
6117 if (Amt.
ugt(VTBits)) {
6119 }
else if (Amt.
ugt(NVTBits)) {
6122 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6123 }
else if (Amt == NVTBits) {
6131 NVT, InL,
MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6134 }
else if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
6135 if (Amt.
ugt(VTBits)) {
6137 }
else if (Amt.
ugt(NVTBits)) {
6139 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6141 }
else if (Amt == NVTBits) {
6145 auto ShiftAmtConst =
MIRBuilder.buildConstant(AmtTy, Amt);
6147 auto OrLHS =
MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6149 NVT, InH,
MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6155 if (Amt.
ugt(VTBits)) {
6157 NVT, InH,
MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6158 }
else if (Amt.
ugt(NVTBits)) {
6160 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6162 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6163 }
else if (Amt == NVTBits) {
6166 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6168 auto ShiftAmtConst =
MIRBuilder.buildConstant(AmtTy, Amt);
6170 auto OrLHS =
MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6172 NVT, InH,
MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6179 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), {Lo, Hi});
6180 MI.eraseFromParent();
6196 LLT DstTy = MRI.getType(DstReg);
6201 LLT ShiftAmtTy = MRI.getType(Amt);
6203 if (DstEltSize % 2 != 0)
6219 const unsigned NumParts = DstEltSize / RequestedTy.
getSizeInBits();
6230 const unsigned NewBitSize = DstEltSize / 2;
6242 auto NewBits =
MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
6244 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6245 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6248 auto AmtExcess =
MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
6249 auto AmtLack =
MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
6251 auto Zero =
MIRBuilder.buildConstant(ShiftAmtTy, 0);
6256 switch (
MI.getOpcode()) {
6257 case TargetOpcode::G_SHL: {
6259 auto LoS =
MIRBuilder.buildShl(HalfTy, InL, Amt);
6261 auto LoOr =
MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
6262 auto HiOr =
MIRBuilder.buildShl(HalfTy, InH, Amt);
6263 auto HiS =
MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6266 auto LoL =
MIRBuilder.buildConstant(HalfTy, 0);
6267 auto HiL =
MIRBuilder.buildShl(HalfTy, InL, AmtExcess);
6269 auto Lo =
MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
6271 HalfTy, IsZero, InH,
MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
6273 ResultRegs[0] =
Lo.getReg(0);
6274 ResultRegs[1] =
Hi.getReg(0);
6277 case TargetOpcode::G_LSHR:
6278 case TargetOpcode::G_ASHR: {
6280 auto HiS =
MIRBuilder.buildInstr(
MI.getOpcode(), {HalfTy}, {InH, Amt});
6282 auto LoOr =
MIRBuilder.buildLShr(HalfTy, InL, Amt);
6283 auto HiOr =
MIRBuilder.buildShl(HalfTy, InH, AmtLack);
6284 auto LoS =
MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6288 if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
6291 auto ShiftAmt =
MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
6292 HiL =
MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt);
6294 auto LoL =
MIRBuilder.buildInstr(
MI.getOpcode(), {HalfTy},
6298 HalfTy, IsZero, InL,
MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
6300 auto Hi =
MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
6302 ResultRegs[0] =
Lo.getReg(0);
6303 ResultRegs[1] =
Hi.getReg(0);
6310 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
6311 MI.eraseFromParent();
6320 LLT TargetTy,
LLT ShiftAmtTy) {
6323 assert(WordShiftConst && BitShiftConst &&
"Expected constants");
6325 const unsigned ShiftWords = WordShiftConst->getZExtValue();
6326 const unsigned ShiftBits = BitShiftConst->getZExtValue();
6327 const bool NeedsInterWordShift = ShiftBits != 0;
6330 case TargetOpcode::G_SHL: {
6333 if (PartIdx < ShiftWords)
6336 unsigned SrcIdx = PartIdx - ShiftWords;
6337 if (!NeedsInterWordShift)
6338 return SrcParts[SrcIdx];
6343 auto Lo =
MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx - 1],
6347 return Hi.getReg(0);
6350 case TargetOpcode::G_LSHR: {
6351 unsigned SrcIdx = PartIdx + ShiftWords;
6352 if (SrcIdx >= NumParts)
6354 if (!NeedsInterWordShift)
6355 return SrcParts[SrcIdx];
6359 if (SrcIdx + 1 < NumParts) {
6360 auto Hi =
MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx + 1],
6364 return Lo.getReg(0);
6367 case TargetOpcode::G_ASHR: {
6369 unsigned SrcIdx = PartIdx + ShiftWords;
6370 if (SrcIdx >= NumParts)
6372 if (!NeedsInterWordShift)
6373 return SrcParts[SrcIdx];
6378 (SrcIdx == NumParts - 1)
6382 (SrcIdx + 1 < NumParts) ? SrcParts[SrcIdx + 1] : Params.
SignBit;
6404 unsigned MainOpcode = (Opcode == TargetOpcode::G_ASHR)
6405 ?
static_cast<unsigned>(TargetOpcode::G_LSHR)
6410 MIRBuilder.buildInstr(MainOpcode, {TargetTy}, {MainOperand, ShiftAmt})
6419 LLT ShiftAmtTy = MRI.getType(ShiftAmt);
6420 auto ZeroConst =
MIRBuilder.buildConstant(ShiftAmtTy, 0);
6422 auto IsZeroBitShift =
6430 unsigned CarryOpcode = (Opcode == TargetOpcode::G_SHL) ? TargetOpcode::G_LSHR
6431 : TargetOpcode::G_SHL;
6434 auto TargetBitsConst =
6436 auto InvShiftAmt =
MIRBuilder.buildSub(ShiftAmtTy, TargetBitsConst, ShiftAmt);
6441 .buildInstr(CarryOpcode, {TargetTy}, {CarryOperand, InvShiftAmt})
6446 auto ZeroReg =
MIRBuilder.buildConstant(TargetTy, 0);
6448 MIRBuilder.buildSelect(TargetTy, IsZeroBitShift, ZeroReg, CarryBits)
6452 return MIRBuilder.buildOr(TargetTy, MainShifted, SafeCarryBits).getReg(0);
6465 LLT DstTy = MRI.getType(DstReg);
6469 const unsigned NumParts = DstBits / TargetBits;
6471 assert(DstBits % TargetBits == 0 &&
"Target type must evenly divide source");
6481 MIRBuilder.buildMergeLikeInstr(DstReg, SrcParts);
6482 MI.eraseFromParent();
6487 const unsigned ShiftWords = Amt.
getZExtValue() / TargetBits;
6488 const unsigned ShiftBits = Amt.
getZExtValue() % TargetBits;
6494 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - ShiftBits).getReg(0);
6498 if (
MI.getOpcode() == TargetOpcode::G_ASHR)
6501 .buildAShr(TargetTy, SrcParts[SrcParts.
size() - 1],
6502 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1))
6506 for (
unsigned I = 0;
I < NumParts; ++
I)
6508 Params, TargetTy, ShiftAmtTy);
6510 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6511 MI.eraseFromParent();
6520 LLT DstTy = MRI.getType(DstReg);
6521 LLT ShiftAmtTy = MRI.getType(AmtReg);
6525 const unsigned NumParts = DstBits / TargetBits;
6527 assert(DstBits % TargetBits == 0 &&
"Target type must evenly divide source");
6544 auto ZeroAmtConst =
MIRBuilder.buildConstant(ShiftAmtTy, 0);
6556 unsigned TargetBitsLog2 =
Log2_32(TargetBits);
6557 auto TargetBitsLog2Const =
6558 MIRBuilder.buildConstant(ShiftAmtTy, TargetBitsLog2);
6559 auto TargetBitsMask =
MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6562 MIRBuilder.buildLShr(ShiftAmtTy, AmtReg, TargetBitsLog2Const).getReg(0);
6564 MIRBuilder.buildAnd(ShiftAmtTy, AmtReg, TargetBitsMask).getReg(0);
6572 if (
MI.getOpcode() == TargetOpcode::G_ASHR) {
6573 auto TargetBitsMinusOneConst =
6574 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6576 .buildAShr(TargetTy, SrcParts[NumParts - 1],
6577 TargetBitsMinusOneConst)
6580 FillValue = ZeroReg;
6588 for (
unsigned I = 0;
I < NumParts; ++
I) {
6590 Register InBoundsResult = FillValue;
6600 for (
unsigned K = 0; K < NumParts; ++K) {
6601 auto WordShiftKConst =
MIRBuilder.buildConstant(ShiftAmtTy, K);
6603 WordShift, WordShiftKConst);
6615 switch (
MI.getOpcode()) {
6616 case TargetOpcode::G_SHL:
6617 MainSrcIdx = (int)
I - (
int)K;
6618 CarrySrcIdx = MainSrcIdx - 1;
6620 case TargetOpcode::G_LSHR:
6621 case TargetOpcode::G_ASHR:
6622 MainSrcIdx = (int)
I + (
int)K;
6623 CarrySrcIdx = MainSrcIdx + 1;
6631 if (MainSrcIdx >= 0 && MainSrcIdx < (
int)NumParts) {
6632 Register MainOp = SrcParts[MainSrcIdx];
6636 if (CarrySrcIdx >= 0 && CarrySrcIdx < (
int)NumParts)
6637 CarryOp = SrcParts[CarrySrcIdx];
6638 else if (
MI.getOpcode() == TargetOpcode::G_ASHR &&
6639 CarrySrcIdx >= (
int)NumParts)
6640 CarryOp = FillValue;
6646 ResultForK = FillValue;
6652 .buildSelect(TargetTy, IsWordShiftK, ResultForK, InBoundsResult)
6659 .buildSelect(TargetTy, IsZeroShift, SrcParts[
I], InBoundsResult)
6663 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6664 MI.eraseFromParent();
6671 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
6674 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
6689 assert(Ty.isScalar() &&
"Expected scalar type to make neutral element for");
6694 "getNeutralElementForVecReduce called with invalid opcode!");
6695 case TargetOpcode::G_VECREDUCE_ADD:
6696 case TargetOpcode::G_VECREDUCE_OR:
6697 case TargetOpcode::G_VECREDUCE_XOR:
6698 case TargetOpcode::G_VECREDUCE_UMAX:
6700 case TargetOpcode::G_VECREDUCE_MUL:
6702 case TargetOpcode::G_VECREDUCE_AND:
6703 case TargetOpcode::G_VECREDUCE_UMIN:
6706 case TargetOpcode::G_VECREDUCE_SMAX:
6709 case TargetOpcode::G_VECREDUCE_SMIN:
6712 case TargetOpcode::G_VECREDUCE_FADD:
6714 case TargetOpcode::G_VECREDUCE_FMUL:
6716 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6717 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6718 assert(
false &&
"getNeutralElementForVecReduce unimplemented for "
6719 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6727 unsigned Opc =
MI.getOpcode();
6729 case TargetOpcode::G_IMPLICIT_DEF:
6730 case TargetOpcode::G_LOAD: {
6738 case TargetOpcode::G_STORE:
6745 case TargetOpcode::G_AND:
6746 case TargetOpcode::G_OR:
6747 case TargetOpcode::G_XOR:
6748 case TargetOpcode::G_ADD:
6749 case TargetOpcode::G_SUB:
6750 case TargetOpcode::G_MUL:
6751 case TargetOpcode::G_FADD:
6752 case TargetOpcode::G_FSUB:
6753 case TargetOpcode::G_FMUL:
6754 case TargetOpcode::G_FDIV:
6755 case TargetOpcode::G_FCOPYSIGN:
6756 case TargetOpcode::G_UADDSAT:
6757 case TargetOpcode::G_USUBSAT:
6758 case TargetOpcode::G_SADDSAT:
6759 case TargetOpcode::G_SSUBSAT:
6760 case TargetOpcode::G_SMIN:
6761 case TargetOpcode::G_SMAX:
6762 case TargetOpcode::G_UMIN:
6763 case TargetOpcode::G_UMAX:
6764 case TargetOpcode::G_FMINNUM:
6765 case TargetOpcode::G_FMAXNUM:
6766 case TargetOpcode::G_FMINNUM_IEEE:
6767 case TargetOpcode::G_FMAXNUM_IEEE:
6768 case TargetOpcode::G_FMINIMUM:
6769 case TargetOpcode::G_FMAXIMUM:
6770 case TargetOpcode::G_FMINIMUMNUM:
6771 case TargetOpcode::G_FMAXIMUMNUM:
6772 case TargetOpcode::G_STRICT_FADD:
6773 case TargetOpcode::G_STRICT_FSUB:
6774 case TargetOpcode::G_STRICT_FMUL: {
6782 case TargetOpcode::G_SHL:
6783 case TargetOpcode::G_ASHR:
6784 case TargetOpcode::G_LSHR: {
6790 MRI.getType(
MI.getOperand(2).getReg()).getElementType());
6796 case TargetOpcode::G_FMA:
6797 case TargetOpcode::G_STRICT_FMA:
6798 case TargetOpcode::G_FSHR:
6799 case TargetOpcode::G_FSHL: {
6808 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6809 case TargetOpcode::G_EXTRACT:
6816 case TargetOpcode::G_INSERT:
6817 case TargetOpcode::G_INSERT_VECTOR_ELT:
6818 case TargetOpcode::G_FREEZE:
6819 case TargetOpcode::G_FNEG:
6820 case TargetOpcode::G_FABS:
6821 case TargetOpcode::G_FSQRT:
6822 case TargetOpcode::G_FCEIL:
6823 case TargetOpcode::G_FFLOOR:
6824 case TargetOpcode::G_FNEARBYINT:
6825 case TargetOpcode::G_FRINT:
6826 case TargetOpcode::G_INTRINSIC_ROUND:
6827 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6828 case TargetOpcode::G_INTRINSIC_TRUNC:
6829 case TargetOpcode::G_BITREVERSE:
6830 case TargetOpcode::G_BSWAP:
6831 case TargetOpcode::G_FCANONICALIZE:
6832 case TargetOpcode::G_SEXT_INREG:
6833 case TargetOpcode::G_ABS:
6834 case TargetOpcode::G_CTLZ:
6835 case TargetOpcode::G_CTPOP:
6843 case TargetOpcode::G_SELECT: {
6844 auto [DstReg, DstTy, CondReg, CondTy] =
MI.getFirst2RegLLTs();
6846 if (!CondTy.isScalar() ||
6852 auto ShufSplat =
MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6854 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6859 if (CondTy.isVector())
6869 case TargetOpcode::G_UNMERGE_VALUES:
6871 case TargetOpcode::G_PHI:
6873 case TargetOpcode::G_SHUFFLE_VECTOR:
6875 case TargetOpcode::G_BUILD_VECTOR: {
6877 for (
auto Op :
MI.uses()) {
6885 MIRBuilder.buildDeleteTrailingVectorElements(
6886 MI.getOperand(0).getReg(),
MIRBuilder.buildInstr(
Opc, {MoreTy}, Elts));
6887 MI.eraseFromParent();
6890 case TargetOpcode::G_SEXT:
6891 case TargetOpcode::G_ZEXT:
6892 case TargetOpcode::G_ANYEXT:
6893 case TargetOpcode::G_TRUNC:
6894 case TargetOpcode::G_FPTRUNC:
6895 case TargetOpcode::G_FPEXT:
6896 case TargetOpcode::G_FPTOSI:
6897 case TargetOpcode::G_FPTOUI:
6898 case TargetOpcode::G_FPTOSI_SAT:
6899 case TargetOpcode::G_FPTOUI_SAT:
6900 case TargetOpcode::G_SITOFP:
6901 case TargetOpcode::G_UITOFP: {
6908 MRI.getType(
MI.getOperand(1).getReg()).getElementType());
6911 MRI.getType(
MI.getOperand(0).getReg()).getElementType());
6919 case TargetOpcode::G_ICMP:
6920 case TargetOpcode::G_FCMP: {
6928 MRI.getType(
MI.getOperand(0).getReg()).getElementType());
6933 case TargetOpcode::G_BITCAST: {
6937 LLT SrcTy = MRI.getType(
MI.getOperand(1).getReg());
6938 LLT DstTy = MRI.getType(
MI.getOperand(0).getReg());
6954 case TargetOpcode::G_VECREDUCE_FADD:
6955 case TargetOpcode::G_VECREDUCE_FMUL:
6956 case TargetOpcode::G_VECREDUCE_ADD:
6957 case TargetOpcode::G_VECREDUCE_MUL:
6958 case TargetOpcode::G_VECREDUCE_AND:
6959 case TargetOpcode::G_VECREDUCE_OR:
6960 case TargetOpcode::G_VECREDUCE_XOR:
6961 case TargetOpcode::G_VECREDUCE_SMAX:
6962 case TargetOpcode::G_VECREDUCE_SMIN:
6963 case TargetOpcode::G_VECREDUCE_UMAX:
6964 case TargetOpcode::G_VECREDUCE_UMIN: {
6965 LLT OrigTy = MRI.getType(
MI.getOperand(1).getReg());
6967 auto NewVec =
MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6968 auto NeutralElement = getNeutralElementForVecReduce(
6974 auto Idx =
MIRBuilder.buildConstant(IdxTy, i);
6975 NewVec =
MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6976 NeutralElement, Idx);
6980 MO.
setReg(NewVec.getReg(0));
6992 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6994 unsigned MaskNumElts = Mask.size();
6995 unsigned SrcNumElts = SrcTy.getNumElements();
6998 if (MaskNumElts == SrcNumElts)
7001 if (MaskNumElts < SrcNumElts) {
7009 MIRBuilder.buildShuffleVector(
MI.getOperand(0).getReg(),
7010 MI.getOperand(1).getReg(),
7011 MI.getOperand(2).getReg(), NewMask);
7012 MI.eraseFromParent();
7017 unsigned PaddedMaskNumElts =
alignTo(MaskNumElts, SrcNumElts);
7018 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
7027 MOps1[0] =
MI.getOperand(1).getReg();
7028 MOps2[0] =
MI.getOperand(2).getReg();
7030 auto Src1 =
MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
7031 auto Src2 =
MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
7035 for (
unsigned I = 0;
I != MaskNumElts; ++
I) {
7037 if (Idx >=
static_cast<int>(SrcNumElts))
7038 Idx += PaddedMaskNumElts - SrcNumElts;
7043 if (MaskNumElts != PaddedMaskNumElts) {
7045 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
7048 for (
unsigned I = 0;
I < MaskNumElts; ++
I) {
7050 MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle,
I)
7055 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
7058 MI.eraseFromParent();
7064 unsigned int TypeIdx,
LLT MoreTy) {
7065 auto [DstTy, Src1Ty, Src2Ty] =
MI.getFirst3LLTs();
7067 unsigned NumElts = DstTy.getNumElements();
7070 if (DstTy.isVector() && Src1Ty.isVector() &&
7071 DstTy.getNumElements() != Src1Ty.getNumElements()) {
7079 if (DstTy != Src1Ty || DstTy != Src2Ty)
7087 for (
unsigned I = 0;
I != NumElts; ++
I) {
7089 if (Idx <
static_cast<int>(NumElts))
7092 NewMask[
I] = Idx - NumElts + WidenNumElts;
7096 MIRBuilder.buildShuffleVector(
MI.getOperand(0).getReg(),
7097 MI.getOperand(1).getReg(),
7098 MI.getOperand(2).getReg(), NewMask);
7099 MI.eraseFromParent();
7108 unsigned SrcParts = Src1Regs.
size();
7109 unsigned DstParts = DstRegs.
size();
7111 unsigned DstIdx = 0;
7113 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
7114 DstRegs[DstIdx] = FactorSum;
7119 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
7121 for (
unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
7122 i <= std::min(DstIdx, SrcParts - 1); ++i) {
7124 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
7128 for (
unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
7129 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
7131 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
7141 if (DstIdx != DstParts - 1) {
7142 MachineInstrBuilder Uaddo =
7143 B.buildUAddo(NarrowTy,
LLT::integer(1), Factors[0], Factors[1]);
7144 FactorSum = Uaddo.
getReg(0);
7145 CarrySum =
B.buildZExt(NarrowTy, Uaddo.
getReg(1)).getReg(0);
7146 for (
unsigned i = 2; i < Factors.
size(); ++i) {
7147 MachineInstrBuilder Uaddo =
7148 B.buildUAddo(NarrowTy,
LLT::integer(1), FactorSum, Factors[i]);
7149 FactorSum = Uaddo.
getReg(0);
7150 MachineInstrBuilder Carry =
B.buildZExt(NarrowTy, Uaddo.
getReg(1));
7151 CarrySum =
B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
7155 FactorSum =
B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
7156 for (
unsigned i = 2; i < Factors.
size(); ++i)
7157 FactorSum =
B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
7160 CarrySumPrevDstIdx = CarrySum;
7161 DstRegs[DstIdx] = FactorSum;
7173 LLT DstType = MRI.getType(DstReg);
7175 if (DstType.isVector())
7178 unsigned Opcode =
MI.getOpcode();
7179 unsigned OpO, OpE, OpF;
7181 case TargetOpcode::G_SADDO:
7182 case TargetOpcode::G_SADDE:
7183 case TargetOpcode::G_UADDO:
7184 case TargetOpcode::G_UADDE:
7185 case TargetOpcode::G_ADD:
7186 OpO = TargetOpcode::G_UADDO;
7187 OpE = TargetOpcode::G_UADDE;
7188 OpF = TargetOpcode::G_UADDE;
7189 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
7190 OpF = TargetOpcode::G_SADDE;
7192 case TargetOpcode::G_SSUBO:
7193 case TargetOpcode::G_SSUBE:
7194 case TargetOpcode::G_USUBO:
7195 case TargetOpcode::G_USUBE:
7196 case TargetOpcode::G_SUB:
7197 OpO = TargetOpcode::G_USUBO;
7198 OpE = TargetOpcode::G_USUBE;
7199 OpF = TargetOpcode::G_USUBE;
7200 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
7201 OpF = TargetOpcode::G_SSUBE;
7208 unsigned NumDefs =
MI.getNumExplicitDefs();
7209 Register Src1 =
MI.getOperand(NumDefs).getReg();
7210 Register Src2 =
MI.getOperand(NumDefs + 1).getReg();
7213 CarryDst =
MI.getOperand(1).getReg();
7214 if (
MI.getNumOperands() == NumDefs + 3)
7215 CarryIn =
MI.getOperand(NumDefs + 2).getReg();
7217 LLT RegTy = MRI.getType(
MI.getOperand(0).getReg());
7218 LLT LeftoverTy, DummyTy;
7220 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
7225 int NarrowParts = Src1Regs.
size();
7226 Src1Regs.
append(Src1Left);
7227 Src2Regs.
append(Src2Left);
7230 for (
int i = 0, e = Src1Regs.
size(); i != e; ++i) {
7232 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
7235 if (i == e - 1 && CarryDst)
7236 CarryOut = CarryDst;
7238 CarryOut = MRI.createGenericVirtualRegister(
LLT::integer(1));
7241 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
7242 {Src1Regs[i], Src2Regs[i]});
7243 }
else if (i == e - 1) {
7244 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
7245 {Src1Regs[i], Src2Regs[i], CarryIn});
7247 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
7248 {Src1Regs[i], Src2Regs[i], CarryIn});
7254 insertParts(
MI.getOperand(0).getReg(), RegTy, NarrowTy,
7255 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
7256 ArrayRef(DstRegs).drop_front(NarrowParts));
7258 MI.eraseFromParent();
7264 auto [DstReg, Src1, Src2] =
MI.getFirst3Regs();
7266 LLT Ty = MRI.getType(DstReg);
7270 unsigned Size = Ty.getSizeInBits();
7272 if (
Size % NarrowSize != 0)
7275 unsigned NumParts =
Size / NarrowSize;
7276 bool IsMulHigh =
MI.getOpcode() == TargetOpcode::G_UMULH;
7277 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
7283 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
7287 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7288 MI.eraseFromParent();
7298 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_FPTOSI;
7301 LLT SrcTy = MRI.getType(Src);
7312 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
7325 int64_t SizeOp1 = MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
7328 if (SizeOp1 % NarrowSize != 0)
7330 int NumParts = SizeOp1 / NarrowSize;
7333 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
7337 uint64_t OpStart =
MI.getOperand(2).getImm();
7338 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7339 for (
int i = 0; i < NumParts; ++i) {
7340 unsigned SrcStart = i * NarrowSize;
7342 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
7345 }
else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7353 int64_t ExtractOffset;
7355 if (OpStart < SrcStart) {
7357 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
7359 ExtractOffset = OpStart - SrcStart;
7360 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
7364 if (ExtractOffset != 0 || SegSize != NarrowSize) {
7366 SegReg = MRI.createGenericVirtualRegister(
LLT::scalar(SegSize));
7367 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
7374 if (MRI.getType(DstReg).isVector())
7375 MIRBuilder.buildBuildVector(DstReg, DstRegs);
7376 else if (DstRegs.
size() > 1)
7377 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7380 MI.eraseFromParent();
7392 LLT RegTy = MRI.getType(
MI.getOperand(0).getReg());
7394 extractParts(
MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
7397 SrcRegs.
append(LeftoverRegs);
7401 uint64_t OpStart =
MI.getOperand(3).getImm();
7402 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7403 for (
int I = 0, E = SrcRegs.
size();
I != E; ++
I) {
7404 unsigned DstStart =
I * NarrowSize;
7406 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7414 if (MRI.getType(SrcRegs[
I]) == LeftoverTy) {
7416 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
7420 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
7428 int64_t ExtractOffset, InsertOffset;
7430 if (OpStart < DstStart) {
7432 ExtractOffset = DstStart - OpStart;
7433 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
7435 InsertOffset = OpStart - DstStart;
7438 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
7442 if (ExtractOffset != 0 || SegSize != OpSize) {
7444 SegReg = MRI.createGenericVirtualRegister(
LLT::scalar(SegSize));
7445 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
7448 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
7449 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
7457 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
7460 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7462 MI.eraseFromParent();
7470 LLT DstTy = MRI.getType(DstReg);
7472 assert(
MI.getNumOperands() == 3 && TypeIdx == 0);
7478 if (!
extractParts(
MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
7479 Src0Regs, Src0LeftoverRegs,
MIRBuilder, MRI))
7483 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
7484 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
7487 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
7488 auto Inst =
MIRBuilder.buildInstr(
MI.getOpcode(), {NarrowTy},
7489 {Src0Regs[I], Src1Regs[I]});
7493 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
7496 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
7497 DstLeftoverRegs.
push_back(Inst.getReg(0));
7500 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7501 LeftoverTy, DstLeftoverRegs);
7503 MI.eraseFromParent();
7513 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
7515 LLT DstTy = MRI.getType(DstReg);
7520 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
7521 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
MI.getOpcode());
7522 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
7524 MI.eraseFromParent();
7534 Register CondReg =
MI.getOperand(1).getReg();
7535 LLT CondTy = MRI.getType(CondReg);
7536 if (CondTy.isVector())
7540 LLT DstTy = MRI.getType(DstReg);
7546 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
7547 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
7551 if (!
extractParts(
MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
7552 Src2Regs, Src2LeftoverRegs,
MIRBuilder, MRI))
7555 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
7557 CondReg, Src1Regs[
I], Src2Regs[
I]);
7561 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
7563 LeftoverTy, CondReg, Src1LeftoverRegs[
I], Src2LeftoverRegs[
I]);
7567 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7568 LeftoverTy, DstLeftoverRegs);
7570 MI.eraseFromParent();
7580 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7583 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7584 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_POISON;
7587 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
7589 auto C_0 =
B.buildConstant(NarrowTy, 0);
7591 UnmergeSrc.getReg(1), C_0);
7592 auto LoCTLZ = IsUndef ?
B.buildCTLZ_ZERO_POISON(DstTy, UnmergeSrc.getReg(0))
7593 :
B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
7594 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
7595 auto HiIsZeroCTLZ =
B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
7596 auto HiCTLZ =
B.buildCTLZ_ZERO_POISON(DstTy, UnmergeSrc.getReg(1));
7597 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
7599 MI.eraseFromParent();
7612 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7615 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7616 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_POISON;
7619 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
7621 auto C_0 =
B.buildConstant(NarrowTy, 0);
7623 UnmergeSrc.getReg(0), C_0);
7624 auto HiCTTZ = IsUndef ?
B.buildCTTZ_ZERO_POISON(DstTy, UnmergeSrc.getReg(1))
7625 :
B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
7626 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
7627 auto LoIsZeroCTTZ =
B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
7628 auto LoCTTZ =
B.buildCTTZ_ZERO_POISON(DstTy, UnmergeSrc.getReg(0));
7629 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
7631 MI.eraseFromParent();
7644 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7647 if (!SrcTy.isScalar() || SrcTy.getSizeInBits() != 2 * NarrowSize)
7652 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
7656 auto ShAmt =
B.buildConstant(NarrowTy, NarrowSize - 1);
7657 auto Sign =
B.buildAShr(NarrowTy,
Hi, ShAmt);
7665 auto LoInv =
B.buildXor(DstTy,
Lo, Sign);
7666 auto LoCTLZ =
B.buildCTLZ(DstTy, LoInv);
7669 auto C_NarrowSizeM1 =
B.buildConstant(DstTy, NarrowSize - 1);
7670 auto HiIsSignCTLS =
B.buildAdd(DstTy, LoCTLZ, C_NarrowSizeM1);
7672 auto HiCTLS =
B.buildCTLS(DstTy,
Hi);
7674 B.buildSelect(DstReg, HiIsSign, HiIsSignCTLS, HiCTLS);
7676 MI.eraseFromParent();
7686 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7689 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7690 auto UnmergeSrc =
MIRBuilder.buildUnmerge(NarrowTy,
MI.getOperand(1));
7692 auto LoCTPOP =
MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
7693 auto HiCTPOP =
MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
7694 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
7696 MI.eraseFromParent();
7711 LLT ExpTy = MRI.getType(ExpReg);
7716 auto MinExp =
B.buildConstant(ExpTy,
minIntN(ClampSize));
7717 auto ClampMin =
B.buildSMax(ExpTy, ExpReg, MinExp);
7718 auto MaxExp =
B.buildConstant(ExpTy,
maxIntN(ClampSize));
7719 auto Clamp =
B.buildSMin(ExpTy, ClampMin, MaxExp);
7721 auto Trunc =
B.buildTrunc(NarrowTy, Clamp);
7723 MI.getOperand(2).setReg(Trunc.getReg(0));
7730 unsigned Opc =
MI.getOpcode();
7733 auto QAction = LI.getAction(Q).Action;
7739 case TargetOpcode::G_CTLZ_ZERO_POISON: {
7742 MI.setDesc(
TII.get(TargetOpcode::G_CTLZ));
7746 case TargetOpcode::G_CTLZ: {
7747 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7748 unsigned Len = SrcTy.getScalarSizeInBits();
7750 if (isSupported({TargetOpcode::G_CTLZ_ZERO_POISON, {DstTy, SrcTy}})) {
7752 auto CtlzZU =
MIRBuilder.buildCTLZ_ZERO_POISON(DstTy, SrcReg);
7753 auto ZeroSrc =
MIRBuilder.buildConstant(SrcTy, 0);
7756 auto LenConst =
MIRBuilder.buildConstant(DstTy, Len);
7757 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
7758 MI.eraseFromParent();
7774 for (
unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7775 auto MIBShiftAmt =
MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7778 Op = MIBOp.getReg(0);
7783 MI.eraseFromParent();
7786 case TargetOpcode::G_CTTZ_ZERO_POISON: {
7789 MI.setDesc(
TII.get(TargetOpcode::G_CTTZ));
7793 case TargetOpcode::G_CTTZ: {
7794 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7796 unsigned Len = SrcTy.getScalarSizeInBits();
7797 if (isSupported({TargetOpcode::G_CTTZ_ZERO_POISON, {DstTy, SrcTy}})) {
7800 auto CttzZU =
MIRBuilder.buildCTTZ_ZERO_POISON(DstTy, SrcReg);
7801 auto Zero =
MIRBuilder.buildConstant(SrcTy, 0);
7804 auto LenConst =
MIRBuilder.buildConstant(DstTy, Len);
7805 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
7806 MI.eraseFromParent();
7813 auto MIBCstNeg1 =
MIRBuilder.buildConstant(SrcTy, -1);
7814 auto MIBNot =
MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7816 SrcTy, MIBNot,
MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7817 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7818 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7819 auto MIBCstLen =
MIRBuilder.buildConstant(SrcTy, Len);
7822 MI.eraseFromParent();
7826 MI.setDesc(
TII.get(TargetOpcode::G_CTPOP));
7827 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7831 case TargetOpcode::G_CTPOP: {
7833 LLT Ty = MRI.getType(SrcReg);
7834 unsigned Size = Ty.getScalarSizeInBits();
7846 auto C_1 =
B.buildConstant(Ty, 1);
7847 auto B2Set1LoTo1Hi =
B.buildLShr(Ty, SrcReg, C_1);
7849 auto C_B2Mask1HiTo0 =
B.buildConstant(Ty, B2Mask1HiTo0);
7850 auto B2Count1Hi =
B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7851 auto B2Count =
B.buildSub(Ty, SrcReg, B2Count1Hi);
7855 auto C_2 =
B.buildConstant(Ty, 2);
7856 auto B4Set2LoTo2Hi =
B.buildLShr(Ty, B2Count, C_2);
7858 auto C_B4Mask2HiTo0 =
B.buildConstant(Ty, B4Mask2HiTo0);
7859 auto B4HiB2Count =
B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7860 auto B4LoB2Count =
B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7861 auto B4Count =
B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7868 auto C_4 =
B.buildConstant(Ty, 4);
7869 auto B8HiB4Count =
B.buildLShr(Ty, B4Count, C_4);
7870 auto B8CountDirty4Hi =
B.buildAdd(Ty, B8HiB4Count, B4Count);
7872 auto C_B8Mask4HiTo0 =
B.buildConstant(Ty, B8Mask4HiTo0);
7873 auto B8Count =
B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7875 assert(
Size <= 128 &&
"Scalar size is too large for CTPOP lower algorithm");
7878 if (
Size == 16 && !Ty.isVector()) {
7880 auto C_8 =
B.buildConstant(Ty, 8);
7881 auto HighSum =
B.buildLShr(Ty, B8Count, C_8);
7882 auto Res =
B.buildAdd(Ty, B8Count, HighSum);
7883 B.buildAnd(
MI.getOperand(0).getReg(), Res,
B.buildConstant(Ty, 0xFF));
7884 MI.eraseFromParent();
7893 auto C_SizeM8 =
B.buildConstant(Ty,
Size - 8);
7895 auto IsMulSupported = [
this](
const LLT Ty) {
7896 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7899 if (IsMulSupported(Ty)) {
7900 auto ResTmp =
B.buildMul(Ty, B8Count, MulMask);
7901 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7903 auto ResTmp = B8Count;
7904 for (
unsigned Shift = 8; Shift <
Size; Shift *= 2) {
7905 auto ShiftC =
B.buildConstant(Ty, Shift);
7906 auto Shl =
B.buildShl(Ty, ResTmp, ShiftC);
7907 ResTmp =
B.buildAdd(Ty, ResTmp, Shl);
7909 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7911 MI.eraseFromParent();
7914 case TargetOpcode::G_CTLS: {
7915 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7919 MIRBuilder.buildConstant(SrcTy, SrcTy.getScalarSizeInBits() - 1);
7920 auto OneC =
MIRBuilder.buildConstant(DstTy, 1);
7922 auto Shr =
MIRBuilder.buildAShr(SrcTy, SrcReg, SignIdxC);
7928 MI.eraseFromParent();
7949 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
7950 LLT Ty = MRI.getType(Dst);
7951 LLT ShTy = MRI.getType(Z);
7958 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
7959 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7964 auto Zero =
MIRBuilder.buildConstant(ShTy, 0);
7965 Z =
MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7969 auto One =
MIRBuilder.buildConstant(ShTy, 1);
7982 MI.eraseFromParent();
7988 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
7989 LLT Ty = MRI.getType(Dst);
7990 LLT ShTy = MRI.getType(Z);
7993 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
8003 auto BitWidthC =
MIRBuilder.buildConstant(ShTy, BW);
8004 ShAmt =
MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
8005 InvShAmt =
MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
8006 ShX =
MIRBuilder.buildShl(Ty,
X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
8007 ShY =
MIRBuilder.buildLShr(Ty,
Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
8011 auto Mask =
MIRBuilder.buildConstant(ShTy, BW - 1);
8014 ShAmt =
MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
8017 InvShAmt =
MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
8019 auto BitWidthC =
MIRBuilder.buildConstant(ShTy, BW);
8020 ShAmt =
MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
8021 InvShAmt =
MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
8024 auto One =
MIRBuilder.buildConstant(ShTy, 1);
8026 ShX =
MIRBuilder.buildShl(Ty,
X, ShAmt).getReg(0);
8028 ShY =
MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
8031 ShX =
MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
8032 ShY =
MIRBuilder.buildLShr(Ty,
Y, ShAmt).getReg(0);
8037 MI.eraseFromParent();
8048 LLT Ty = MRI.getType(Dst);
8049 LLT ShTy = MRI.getType(
MI.getOperand(3).getReg());
8051 bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
8052 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
8055 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action ==
Lower)
8056 return lowerFunnelShiftAsShifts(
MI);
8060 if (Result == UnableToLegalize)
8061 return lowerFunnelShiftAsShifts(
MI);
8066 auto [Dst, Src] =
MI.getFirst2Regs();
8067 LLT DstTy = MRI.getType(Dst);
8068 LLT SrcTy = MRI.getType(Src);
8072 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
8080 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
8084 auto NewExt =
MIRBuilder.buildInstr(
MI.getOpcode(), {MidTy}, {Src});
8088 auto UnmergeSrc =
MIRBuilder.buildUnmerge(EltTy, NewExt);
8093 auto ZExtRes1 =
MIRBuilder.buildInstr(
MI.getOpcode(), {ZExtResTy},
8094 {UnmergeSrc.getReg(0)});
8095 auto ZExtRes2 =
MIRBuilder.buildInstr(
MI.getOpcode(), {ZExtResTy},
8096 {UnmergeSrc.getReg(1)});
8099 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
8101 MI.eraseFromParent();
8118 assert(
MI.getOpcode() == TargetOpcode::G_TRUNC);
8122 LLT DstTy = MRI.getType(DstReg);
8123 LLT SrcTy = MRI.getType(SrcReg);
8131 SrcTy.getElementCount().divideCoefficientBy(2));
8144 Src =
MIRBuilder.buildTrunc(InterTy, Src).getReg(0);
8156 MI.eraseFromParent();
8165 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
8166 auto Zero =
MIRBuilder.buildConstant(AmtTy, 0);
8167 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
8168 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8169 auto Neg =
MIRBuilder.buildSub(AmtTy, Zero, Amt);
8170 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
8171 MI.eraseFromParent();
8176 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
8178 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
8179 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
8184 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8185 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
8187 return lowerRotateWithReverseRotate(
MI);
8190 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8191 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8192 bool IsFShLegal =
false;
8193 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
8194 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
8198 MI.eraseFromParent();
8203 return buildFunnelShift(FShOpc, Dst, Src, Amt);
8206 return buildFunnelShift(RevFsh, Dst, Src, Amt);
8211 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
8212 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
8213 auto BitWidthMinusOneC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits - 1);
8219 auto NegAmt = MIRBuilder.
buildSub(AmtTy, Zero, Amt);
8220 auto ShAmt = MIRBuilder.
buildAnd(AmtTy, Amt, BitWidthMinusOneC);
8222 auto RevAmt = MIRBuilder.
buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
8228 auto BitWidthC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits);
8229 auto ShAmt = MIRBuilder.
buildURem(AmtTy, Amt, BitWidthC);
8231 auto RevAmt = MIRBuilder.
buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
8233 auto Inner = MIRBuilder.
buildInstr(RevShiftOpc, {DstTy}, {Src, One});
8238 MI.eraseFromParent();
8246 auto [Dst, Src] =
MI.getFirst2Regs();
8251 assert(MRI.getType(Src) ==
S64 && MRI.getType(Dst) ==
S32);
8279 auto Mask1 =
MIRBuilder.buildConstant(
S64, 0xffffffffffULL);
8292 auto Select0 =
MIRBuilder.buildSelect(
S32, TCmp, VTrunc1, Zero32);
8296 MI.eraseFromParent();
8304 auto [Dst, Src] =
MI.getFirst2Regs();
8309 assert(MRI.getType(Src) ==
S64 && MRI.getType(Dst) ==
S32);
8322 auto RoundedHalved =
MIRBuilder.buildOr(
S64, Halved, LowerBit);
8324 auto LargeResult =
MIRBuilder.buildFAdd(
S32, HalvedFP, HalvedFP);
8329 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
8331 MI.eraseFromParent();
8339 auto [Dst, Src] =
MI.getFirst2Regs();
8343 assert(MRI.getType(Src) ==
S64 && MRI.getType(Dst) ==
S64);
8354 auto TwoP52 =
MIRBuilder.buildConstant(
S64, UINT64_C(0x4330000000000000));
8355 auto TwoP84 =
MIRBuilder.buildConstant(
S64, UINT64_C(0x4530000000000000));
8357 auto TwoP52P84FP =
MIRBuilder.buildFConstant(
S64, TwoP52P84);
8364 auto HighBitsFP =
MIRBuilder.buildOr(
S64, TwoP84, HighBits);
8365 auto Scratch =
MIRBuilder.buildFSub(
S64, HighBitsFP, TwoP52P84FP);
8366 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
8368 MI.eraseFromParent();
8379 SrcTy.changeElementType(
LLT::floatIEEE(SrcTy.getScalarSizeInBits()));
8380 auto M1 =
MI.getOpcode() == TargetOpcode::G_UITOFP
8386 MI.eraseFromParent();
8391 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8394 auto True =
MIRBuilder.buildFConstant(DstTy, 1.0);
8395 auto False =
MIRBuilder.buildFConstant(DstTy, 0.0);
8396 MIRBuilder.buildSelect(Dst, Src, True, False);
8397 MI.eraseFromParent();
8401 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8421 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8428 auto True =
MIRBuilder.buildFConstant(DstTy, -1.0);
8429 auto False =
MIRBuilder.buildFConstant(DstTy, 0.0);
8430 MIRBuilder.buildSelect(Dst, Src, True, False);
8431 MI.eraseFromParent();
8435 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8441 if (DstTy.getScalarSizeInBits() == 32) {
8448 auto SignBit =
MIRBuilder.buildConstant(I64, 63);
8449 auto S =
MIRBuilder.buildAShr(I64, L, SignBit);
8451 auto LPlusS =
MIRBuilder.buildAdd(I64, L, S);
8458 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
8459 MI.eraseFromParent();
8467 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8471 if (SrcTy !=
S64 && SrcTy !=
S32)
8473 if (DstTy !=
S32 && DstTy !=
S64)
8500 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
8502 MI.eraseFromParent();
8507 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8512 if (SrcTy.getScalarType() !=
S32 || DstTy.getScalarType() !=
S64)
8519 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
8521 auto ExponentMask =
MIRBuilder.buildConstant(SrcTy, 0x7F800000);
8522 auto ExponentLoBit =
MIRBuilder.buildConstant(SrcTy, 23);
8524 auto AndExpMask =
MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
8525 auto ExponentBits =
MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
8527 auto SignMask =
MIRBuilder.buildConstant(SrcTy,
8529 auto AndSignMask =
MIRBuilder.buildAnd(SrcTy, Src, SignMask);
8530 auto SignLowBit =
MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
8531 auto Sign =
MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
8534 auto MantissaMask =
MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
8535 auto AndMantissaMask =
MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
8536 auto K =
MIRBuilder.buildConstant(SrcTy, 0x00800000);
8538 auto R =
MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
8541 auto Bias =
MIRBuilder.buildConstant(SrcTy, 127);
8546 auto Shl =
MIRBuilder.buildShl(DstTy, R, SubExponent);
8547 auto Srl =
MIRBuilder.buildLShr(DstTy, R, ExponentSub);
8553 R =
MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
8555 auto XorSign =
MIRBuilder.buildXor(DstTy, R, Sign);
8556 auto Ret =
MIRBuilder.buildSub(DstTy, XorSign, Sign);
8558 auto ZeroSrcTy =
MIRBuilder.buildConstant(SrcTy, 0);
8563 auto ZeroDstTy =
MIRBuilder.buildConstant(DstTy, 0);
8564 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
8566 MI.eraseFromParent();
8572 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8574 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
8575 unsigned SatWidth = DstTy.getScalarSizeInBits();
8579 APInt MinInt, MaxInt;
8602 if (AreExactFloatBounds) {
8604 auto MaxC =
MIRBuilder.buildFConstant(SrcTy, MinFloat);
8607 auto Max =
MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
8609 auto MinC =
MIRBuilder.buildFConstant(SrcTy, MaxFloat);
8618 MI.eraseFromParent();
8623 auto FpToInt =
MIRBuilder.buildFPTOSI(DstTy, Min);
8628 MI.eraseFromParent();
8635 auto FpToInt = IsSigned ?
MIRBuilder.buildFPTOSI(DstTy, Src)
8643 DstTy, ULT,
MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
8653 MI.eraseFromParent();
8659 DstTy, OGT,
MIRBuilder.buildConstant(DstTy, MaxInt), Max);
8663 MI.eraseFromParent();
8670 assert((
MI.getOpcode() == TargetOpcode::G_FPEXT ||
8671 MI.getOpcode() == TargetOpcode::G_FPTRUNC) &&
8672 "Only G_FPEXT and G_FPTRUNC are expected");
8674 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
8679 if (
MI.getOpcode() == TargetOpcode::G_FPEXT) {
8681 StoreOpc = TargetOpcode::G_STORE;
8682 LoadOpc = TargetOpcode::G_FPEXTLOAD;
8685 StoreOpc = TargetOpcode::G_FPTRUNCSTORE;
8686 LoadOpc = TargetOpcode::G_LOAD;
8695 StackTy, StackTyAlign);
8696 MIRBuilder.buildStoreInstr(StoreOpc, SrcReg, StackTemp, *StoreMMO);
8699 StackTy, StackTyAlign);
8700 MIRBuilder.buildLoadInstr(LoadOpc, DstReg, StackTemp, *LoadMMO);
8702 MI.eraseFromParent();
8712 auto [Dst, Src] =
MI.getFirst2Regs();
8716 if (MRI.getType(Src).isVector())
8720 unsigned Flags =
MI.getFlags();
8723 MI.eraseFromParent();
8727 const unsigned ExpMask = 0x7ff;
8728 const unsigned ExpBiasf64 = 1023;
8729 const unsigned ExpBiasf16 = 15;
8758 auto SelectCC =
MIRBuilder.buildSelect(
S32, CmpM_NE0, Bits0x200, Zero);
8818 MI.eraseFromParent();
8825 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
8835 auto SrcI =
MIRBuilder.buildBitcast(I32Ty, SrcReg);
8857 auto Trunc =
MIRBuilder.buildTrunc(I16Ty, Srl);
8859 MI.eraseFromParent();
8865 auto [DstTy, SrcTy] =
MI.getFirst2LLTs();
8866 if (DstTy.getScalarType().isFloat16() && SrcTy.getScalarType().isFloat64())
8869 if (DstTy.getScalarType().isBFloat16() && SrcTy.getScalarType().isFloat32())
8876 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8877 LLT Ty = MRI.getType(Dst);
8879 auto CvtSrc1 =
MIRBuilder.buildSITOFP(Ty, Src1);
8880 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1,
MI.getFlags());
8881 MI.eraseFromParent();
8886 auto [DstFrac, DstInt, Src] =
MI.getFirst3Regs();
8887 LLT Ty = MRI.getType(Src);
8888 auto Flags =
MI.getFlags();
8896 FracToUse = FracPart.getReg(0);
8898 auto Abs =
MIRBuilder.buildFAbs(Ty, Src, Flags);
8902 auto Zero =
MIRBuilder.buildFConstant(Ty, 0.0);
8904 FracToUse =
Select.getReg(0);
8907 MIRBuilder.buildFCopysign(DstFrac, FracToUse, Src, Flags);
8910 MI.eraseFromParent();
8916 case TargetOpcode::G_SMIN:
8918 case TargetOpcode::G_SMAX:
8920 case TargetOpcode::G_UMIN:
8922 case TargetOpcode::G_UMAX:
8930 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8935 auto Cmp =
MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8936 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8938 MI.eraseFromParent();
8947 LLT DstTy = MRI.getType(Dst);
8948 LLT SrcTy = MRI.getType(Cmp->getReg(1));
8958 auto Zero =
MIRBuilder.buildConstant(DstTy, 0);
8959 auto IsGT =
MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8961 auto IsLT =
MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8964 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
8965 auto BC = TLI.getBooleanContents(DstTy.
isVector(),
false);
8966 if (TLI.preferSelectsOverBooleanArithmetic(
8969 auto One =
MIRBuilder.buildConstant(DstTy, 1);
8970 auto SelectZeroOrOne =
MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8972 auto MinusOne =
MIRBuilder.buildConstant(DstTy, -1);
8973 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8979 unsigned BoolExtOp =
8981 IsGT =
MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8982 IsLT =
MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8986 MI.eraseFromParent();
8992 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] =
MI.getFirst3RegLLTs();
8993 const int Src0Size = Src0Ty.getScalarSizeInBits();
8994 const int Src1Size = Src1Ty.getScalarSizeInBits();
9004 if (!(Src0Ty.getScalarType().isAnyScalar() ||
9005 Src0Ty.getScalarType().isInteger()))
9006 Src0Int =
MIRBuilder.buildBitcast(Src0IntTy, Src0).getReg(0);
9008 if (!(Src1Ty.getScalarType().isAnyScalar() ||
9009 Src1Ty.getScalarType().isInteger()))
9010 Src1Int =
MIRBuilder.buildBitcast(Src1IntTy, Src1).getReg(0);
9015 auto NotSignBitMask =
MIRBuilder.buildConstant(
9019 MIRBuilder.buildAnd(Src0IntTy, Src0Int, NotSignBitMask).getReg(0);
9021 if (Src0Ty == Src1Ty) {
9022 And1 =
MIRBuilder.buildAnd(Src1IntTy, Src1Int, SignBitMask).getReg(0);
9023 }
else if (Src0Size > Src1Size) {
9024 auto ShiftAmt =
MIRBuilder.buildConstant(Src0IntTy, Src0Size - Src1Size);
9025 auto Zext =
MIRBuilder.buildZExt(Src0IntTy, Src1Int);
9026 auto Shift =
MIRBuilder.buildShl(Src0IntTy, Zext, ShiftAmt);
9027 And1 =
MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
9029 auto ShiftAmt =
MIRBuilder.buildConstant(Src1IntTy, Src1Size - Src0Size);
9030 auto Shift =
MIRBuilder.buildLShr(Src1IntTy, Src1Int, ShiftAmt);
9031 auto Trunc =
MIRBuilder.buildTrunc(Src0IntTy, Shift);
9032 And1 =
MIRBuilder.buildAnd(Src0IntTy, Trunc, SignBitMask).getReg(0);
9038 unsigned Flags =
MI.getFlags();
9043 if (DstTy == DstIntTy)
9044 MIRBuilder.buildOr(Dst, And0, And1, Flags).getReg(0);
9050 MI.eraseFromParent();
9061 switch (
MI.getOpcode()) {
9062 case TargetOpcode::G_FMINNUM:
9063 NewOp = TargetOpcode::G_FMINNUM_IEEE;
9065 case TargetOpcode::G_FMINIMUMNUM:
9066 NewOp = TargetOpcode::G_FMINNUM;
9068 case TargetOpcode::G_FMAXNUM:
9069 NewOp = TargetOpcode::G_FMAXNUM_IEEE;
9071 case TargetOpcode::G_FMAXIMUMNUM:
9072 NewOp = TargetOpcode::G_FMAXNUM;
9078 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
9079 LLT Ty = MRI.getType(Dst);
9088 if (!VT->isKnownNeverSNaN(Src0))
9089 Src0 =
MIRBuilder.buildFCanonicalize(Ty, Src0,
MI.getFlags()).getReg(0);
9091 if (!VT->isKnownNeverSNaN(Src1))
9092 Src1 =
MIRBuilder.buildFCanonicalize(Ty, Src1,
MI.getFlags()).getReg(0);
9097 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1},
MI.getFlags());
9098 MI.eraseFromParent();
9104 unsigned Opc =
MI.getOpcode();
9105 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
9106 LLT Ty = MRI.getType(Dst);
9109 bool IsMax = (
Opc == TargetOpcode::G_FMAXIMUM);
9111 IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE;
9112 unsigned OpcNonIeee =
9113 IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM;
9114 bool MinMaxMustRespectOrderedZero =
false;
9118 if (LI.isLegalOrCustom({OpcIeee, Ty})) {
9120 MinMaxMustRespectOrderedZero =
true;
9121 }
else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) {
9126 Res =
MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0);
9131 (!VT->isKnownNeverNaN(Src0) || !VT->isKnownNeverNaN(Src1))) {
9134 LLT ElementTy = Ty.
isScalar() ? Ty : Ty.getElementType();
9138 NaN =
MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0);
9140 Res =
MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0);
9150 const unsigned Flags =
MI.getFlags();
9156 auto LHSTestZero =
MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass);
9158 MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags);
9160 auto RHSTestZero =
MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass);
9162 MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags);
9164 Res =
MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0);
9169 MI.eraseFromParent();
9176 LLT Ty = MRI.getType(DstReg);
9177 unsigned Flags =
MI.getFlags();
9182 MI.eraseFromParent();
9188 auto [DstReg,
X] =
MI.getFirst2Regs();
9189 const unsigned Flags =
MI.getFlags();
9190 const LLT Ty = MRI.getType(DstReg);
9202 auto AbsDiff =
MIRBuilder.buildFAbs(Ty, Diff, Flags);
9204 auto Half =
MIRBuilder.buildFConstant(Ty, 0.5);
9209 auto One =
MIRBuilder.buildFConstant(Ty, 1.0);
9210 auto Zero =
MIRBuilder.buildFConstant(Ty, 0.0);
9211 auto BoolFP =
MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
9212 auto SignedOffset =
MIRBuilder.buildFCopysign(Ty, BoolFP,
X);
9214 MIRBuilder.buildFAdd(DstReg,
T, SignedOffset, Flags);
9216 MI.eraseFromParent();
9221 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
9222 unsigned Flags =
MI.getFlags();
9223 LLT Ty = MRI.getType(DstReg);
9230 auto Trunc =
MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
9231 auto Zero =
MIRBuilder.buildFConstant(Ty, 0.0);
9234 SrcReg, Zero, Flags);
9236 SrcReg, Trunc, Flags);
9240 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
9241 MI.eraseFromParent();
9247 const unsigned NumOps =
MI.getNumOperands();
9248 auto [DstReg, DstTy, Src0Reg, Src0Ty] =
MI.getFirst2RegLLTs();
9249 unsigned PartSize = Src0Ty.getSizeInBits();
9254 for (
unsigned I = 2;
I !=
NumOps; ++
I) {
9255 const unsigned Offset = (
I - 1) * PartSize;
9258 auto ZextInput =
MIRBuilder.buildZExt(WideTy, SrcReg);
9261 MRI.createGenericVirtualRegister(WideTy);
9264 auto Shl =
MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
9265 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
9266 ResultReg = NextResult;
9269 if (DstTy.isPointer()) {
9270 if (
MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
9271 DstTy.getAddressSpace())) {
9279 MI.eraseFromParent();
9285 const unsigned NumDst =
MI.getNumOperands() - 1;
9286 Register SrcReg =
MI.getOperand(NumDst).getReg();
9287 Register Dst0Reg =
MI.getOperand(0).getReg();
9288 LLT DstTy = MRI.getType(Dst0Reg);
9297 LLT IntTy = MRI.getType(SrcReg);
9302 unsigned Offset = DstSize;
9303 for (
unsigned I = 1;
I != NumDst; ++
I,
Offset += DstSize) {
9305 auto Shift =
MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
9309 MI.eraseFromParent();
9328 if (
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
9329 InsertVal =
MI.getOperand(2).getReg();
9331 Register Idx =
MI.getOperand(
MI.getNumOperands() - 1).getReg();
9333 LLT VecTy = MRI.getType(SrcVec);
9343 SrcRegs[IdxVal] =
MI.getOperand(2).getReg();
9344 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
9346 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
9349 MI.eraseFromParent();
9354 LLVM_DEBUG(
dbgs() <<
"Can't handle non-byte element vectors yet\n");
9365 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
9372 int64_t
Offset = IdxVal * EltBytes;
9383 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
9386 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
9388 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
9391 MI.eraseFromParent();
9397 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
9398 MI.getFirst3RegLLTs();
9408 for (
int Idx : Mask) {
9410 if (!
Undef.isValid())
9416 assert(!Src0Ty.isScalar() &&
"Unexpected scalar G_SHUFFLE_VECTOR");
9418 int NumElts = Src0Ty.getNumElements();
9419 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
9420 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
9421 auto [It, Inserted] = CachedExtract.
try_emplace(Idx);
9423 auto IdxK =
MIRBuilder.buildConstant(IdxTy, ExtractIdx);
9425 MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK).getReg(0);
9430 assert(DstTy.isVector() &&
"Unexpected scalar G_SHUFFLE_VECTOR");
9431 MIRBuilder.buildBuildVector(DstReg, BuildVec);
9432 MI.eraseFromParent();
9438 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
9439 MI.getFirst4RegLLTs();
9441 if (VecTy.isScalableVector())
9457 auto OutPos =
MIRBuilder.buildConstant(IdxTy, 0);
9460 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
9463 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
9466 std::optional<APInt> PassthruSplatVal =
9469 if (PassthruSplatVal.has_value()) {
9471 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
9472 }
else if (HasPassthru) {
9473 auto Popcount =
MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
9474 Popcount =
MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
9480 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
9484 unsigned NumElmts = VecTy.getNumElements();
9485 for (
unsigned I = 0;
I < NumElmts; ++
I) {
9487 auto Val =
MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
9490 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
9493 auto MaskI =
MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
9498 OutPos =
MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
9500 if (HasPassthru &&
I == NumElmts - 1) {
9503 auto AllLanesSelected =
MIRBuilder.buildICmp(
9505 OutPos =
MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
9506 {OutPos, EndOfVector});
9510 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
9512 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
9517 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
9519 MI.eraseFromParent();
9530 SPTmp =
MIRBuilder.buildCast(IntPtrTy, SPTmp);
9536 if (Alignment >
Align(1)) {
9539 auto AlignCst =
MIRBuilder.buildConstant(IntPtrTy, AlignMask);
9548 const auto &MF = *
MI.getMF();
9554 Register AllocSize =
MI.getOperand(1).getReg();
9557 LLT PtrTy = MRI.getType(Dst);
9558 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
9565 MI.eraseFromParent();
9571 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9576 MI.eraseFromParent();
9582 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9587 MI.eraseFromParent();
9593 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
9594 unsigned Offset =
MI.getOperand(2).getImm();
9597 if (SrcTy.isVector()) {
9598 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
9599 unsigned DstSize = DstTy.getSizeInBits();
9601 if ((
Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
9602 (
Offset + DstSize <= SrcTy.getSizeInBits())) {
9604 auto Unmerge =
MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
9608 for (
unsigned Idx =
Offset / SrcEltSize;
9609 Idx < (
Offset + DstSize) / SrcEltSize; ++Idx) {
9610 SubVectorElts.
push_back(Unmerge.getReg(Idx));
9612 if (SubVectorElts.
size() == 1)
9613 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
9615 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
9617 MI.eraseFromParent();
9623 if ((SrcTy.isPointer() &&
9624 DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) ||
9625 (DstTy.isPointer() &&
9626 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace()))) {
9627 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
9631 if ((DstTy.isScalar() || DstTy.isPointer()) &&
9632 (SrcTy.isScalar() || SrcTy.isPointer() ||
9633 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
9634 LLT SrcIntTy = SrcTy;
9635 if (!SrcTy.isScalar()) {
9637 SrcReg =
MIRBuilder.buildCast(SrcIntTy, SrcReg).getReg(0);
9641 if (DstTy.isPointer())
9643 MRI.createGenericVirtualRegister(
LLT::scalar(DstTy.getSizeInBits()));
9649 auto Shr =
MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
9653 if (DstTy.isPointer())
9656 MI.eraseFromParent();
9664 auto [Dst, Src, InsertSrc] =
MI.getFirst3Regs();
9667 LLT DstTy = MRI.getType(Src);
9668 LLT InsertTy = MRI.getType(InsertSrc);
9671 bool IsNonIntegralInsert =
9681 if ((IsNonIntegralInsert || IsNonIntegralDst) && InsertTy != EltTy) {
9682 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
9689 if ((
Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
9691 auto UnmergeSrc =
MIRBuilder.buildUnmerge(EltTy, Src);
9695 for (; Idx <
Offset / EltSize; ++Idx) {
9696 DstElts.
push_back(UnmergeSrc.getReg(Idx));
9701 auto UnmergeInsertSrc =
MIRBuilder.buildUnmerge(EltTy, InsertSrc);
9702 for (
unsigned i = 0; Idx < (
Offset + InsertSize) / EltSize;
9704 DstElts.
push_back(UnmergeInsertSrc.getReg(i));
9708 InsertSrc =
MIRBuilder.buildPtrToInt(EltTy, InsertSrc).getReg(0);
9710 InsertSrc =
MIRBuilder.buildIntToPtr(EltTy, InsertSrc).getReg(0);
9717 DstElts.
push_back(UnmergeSrc.getReg(Idx));
9720 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
9721 MI.eraseFromParent();
9730 if (IsNonIntegralDst || IsNonIntegralInsert) {
9731 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
9735 LLT IntDstTy = DstTy;
9739 Src =
MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
9744 InsertSrc =
MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
9750 ExtInsSrc =
MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
9756 auto Mask =
MIRBuilder.buildConstant(IntDstTy, MaskVal);
9757 auto MaskedSrc =
MIRBuilder.buildAnd(IntDstTy, Src, Mask);
9758 auto Or =
MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
9761 MI.eraseFromParent();
9767 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
9768 MI.getFirst4RegLLTs();
9769 const bool IsAdd =
MI.getOpcode() == TargetOpcode::G_SADDO;
9772 LLT BoolTy = Dst1Ty;
9774 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
9789 auto ResultLowerThanLHS =
9793 MIRBuilder.buildXor(Dst1, RHSNegative, ResultLowerThanLHS);
9797 auto LHSLessThanRHS =
9799 auto ResultNegative =
9801 MIRBuilder.buildXor(Dst1, LHSLessThanRHS, ResultNegative);
9805 MI.eraseFromParent();
9811 auto [Res, OvOut, LHS, RHS, CarryIn] =
MI.getFirst5Regs();
9812 const LLT Ty = MRI.getType(Res);
9815 auto Tmp =
MIRBuilder.buildAdd(Ty, LHS, RHS);
9816 auto CarryZ =
MIRBuilder.buildZExt(Ty, CarryIn);
9817 auto Sum =
MIRBuilder.buildAdd(Ty, Tmp, CarryZ);
9828 MI.eraseFromParent();
9833 auto [Res, OvOut, LHS, RHS, CarryIn] =
MI.getFirst5Regs();
9834 const LLT Ty = MRI.getType(Res);
9837 auto CarryZ =
MIRBuilder.buildZExt(Ty, CarryIn);
9838 auto RHSPlusCI =
MIRBuilder.buildAdd(Ty, RHS, CarryZ);
9839 auto Diff =
MIRBuilder.buildSub(Ty, LHS, RHSPlusCI);
9844 auto X2 =
MIRBuilder.buildXor(Ty, LHS, Diff);
9849 MI.eraseFromParent();
9855 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
9856 LLT Ty = MRI.getType(Res);
9860 switch (
MI.getOpcode()) {
9863 case TargetOpcode::G_UADDSAT:
9866 BaseOp = TargetOpcode::G_ADD;
9868 case TargetOpcode::G_SADDSAT:
9871 BaseOp = TargetOpcode::G_ADD;
9873 case TargetOpcode::G_USUBSAT:
9876 BaseOp = TargetOpcode::G_SUB;
9878 case TargetOpcode::G_SSUBSAT:
9881 BaseOp = TargetOpcode::G_SUB;
9896 uint64_t NumBits = Ty.getScalarSizeInBits();
9907 auto NegOne =
MIRBuilder.buildConstant(Ty, -1);
9915 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
9920 auto Min =
MIRBuilder.buildUMin(Ty, Not, RHS);
9921 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
9924 MI.eraseFromParent();
9930 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
9931 LLT Ty = MRI.getType(Res);
9935 unsigned OverflowOp;
9936 switch (
MI.getOpcode()) {
9939 case TargetOpcode::G_UADDSAT:
9942 OverflowOp = TargetOpcode::G_UADDO;
9944 case TargetOpcode::G_SADDSAT:
9947 OverflowOp = TargetOpcode::G_SADDO;
9949 case TargetOpcode::G_USUBSAT:
9952 OverflowOp = TargetOpcode::G_USUBO;
9954 case TargetOpcode::G_SSUBSAT:
9957 OverflowOp = TargetOpcode::G_SSUBO;
9962 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
9963 Register Tmp = OverflowRes.getReg(0);
9964 Register Ov = OverflowRes.getReg(1);
9973 uint64_t NumBits = Ty.getScalarSizeInBits();
9974 auto ShiftAmount =
MIRBuilder.buildConstant(Ty, NumBits - 1);
9975 auto Sign =
MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
9978 Clamp =
MIRBuilder.buildAdd(Ty, Sign, MinVal);
9986 Clamp =
MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
9990 MI.eraseFromParent();
9996 assert((
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
9997 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
9998 "Expected shlsat opcode!");
9999 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SSHLSAT;
10000 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
10001 LLT Ty = MRI.getType(Res);
10005 auto Result =
MIRBuilder.buildShl(Ty, LHS, RHS);
10006 auto Orig = IsSigned ?
MIRBuilder.buildAShr(Ty, Result, RHS)
10015 SatVal =
MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
10020 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
10022 MI.eraseFromParent();
10027 auto [Dst, Src] =
MI.getFirst2Regs();
10028 const LLT Ty = MRI.getType(Src);
10029 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
10030 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
10033 auto ShiftAmt =
MIRBuilder.buildConstant(Ty, BaseShiftAmt);
10034 auto LSByteShiftedLeft =
MIRBuilder.buildShl(Ty, Src, ShiftAmt);
10035 auto MSByteShiftedRight =
MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
10036 auto Res =
MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
10039 for (
unsigned i = 1; i < SizeInBytes / 2; ++i) {
10041 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
10042 auto Mask =
MIRBuilder.buildConstant(Ty, APMask);
10043 auto ShiftAmt =
MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
10045 auto LoByte =
MIRBuilder.buildAnd(Ty, Src, Mask);
10046 auto LoShiftedLeft =
MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
10047 Res =
MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
10049 auto SrcShiftedRight =
MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
10050 auto HiShiftedRight =
MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
10051 Res =
MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
10053 Res.getInstr()->getOperand(0).setReg(Dst);
10055 MI.eraseFromParent();
10062 const LLT Ty = Dst.getLLTTy(*
B.getMRI());
10065 auto LHS =
B.buildLShr(Ty,
B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
10066 auto RHS =
B.buildAnd(Ty,
B.buildShl(Ty, Src, C_N), MaskLoNTo0);
10067 return B.buildOr(Dst,
LHS,
RHS);
10072 auto [Dst, Src] =
MI.getFirst2Regs();
10073 const LLT SrcTy = MRI.getType(Src);
10074 unsigned Size = SrcTy.getScalarSizeInBits();
10075 unsigned VSize = SrcTy.getSizeInBits();
10078 if (SrcTy.isVector() && (VSize % 8 == 0) &&
10079 (LI.isLegal({TargetOpcode::G_BITREVERSE,
10080 {LLT::fixed_vector(VSize / 8, LLT::integer(8)),
10081 LLT::fixed_vector(VSize / 8, LLT::integer(8))}}))) {
10086 auto BSWAP =
MIRBuilder.buildBSwap(SrcTy, Src);
10087 auto Cast =
MIRBuilder.buildBitcast(VTy, BSWAP);
10088 auto RBIT =
MIRBuilder.buildBitReverse(VTy, Cast);
10092 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {SrcTy}, {Src});
10115 for (
unsigned I = 0, J =
Size - 1;
I <
Size; ++
I, --J) {
10119 Tmp2 = MIRBuilder.
buildShl(SrcTy, Src, ShAmt);
10122 Tmp2 = MIRBuilder.
buildLShr(SrcTy, Src, ShAmt);
10126 Tmp2 = MIRBuilder.
buildAnd(SrcTy, Tmp2, Mask);
10130 Tmp = MIRBuilder.
buildOr(SrcTy, Tmp, Tmp2);
10135 MI.eraseFromParent();
10143 bool IsRead =
MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
10144 int NameOpIdx = IsRead ? 1 : 0;
10145 int ValRegIndex = IsRead ? 0 : 1;
10147 Register ValReg =
MI.getOperand(ValRegIndex).getReg();
10148 const LLT Ty = MRI.getType(ValReg);
10150 cast<MDNode>(
MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
10157 (IsRead ?
"llvm.read_register" :
"llvm.write_register"),
10158 Fn,
MI.getDebugLoc()));
10162 MI.eraseFromParent();
10171 MI.eraseFromParent();
10177 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULH;
10178 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
10179 Register Result =
MI.getOperand(0).getReg();
10180 LLT OrigTy = MRI.getType(Result);
10184 auto LHS =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
MI.getOperand(1)});
10185 auto RHS =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
MI.getOperand(2)});
10187 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
10189 auto ShiftAmt =
MIRBuilder.buildConstant(WideTy, SizeInBits);
10190 auto Shifted =
MIRBuilder.buildInstr(ShiftOp, {WideTy}, {
Mul, ShiftAmt});
10193 MI.eraseFromParent();
10199 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
10204 MI.eraseFromParent();
10209 MI.eraseFromParent();
10216 unsigned BitSize = SrcTy.getScalarSizeInBits();
10220 auto AsInt = SrcTy == IntTy ?
MIRBuilder.buildCopy(IntTy, SrcReg)
10227 APInt ExpMask = Inf;
10229 APInt QNaNBitMask =
10233 auto SignBitC =
MIRBuilder.buildConstant(IntTy, SignBit);
10234 auto ValueMaskC =
MIRBuilder.buildConstant(IntTy, ValueMask);
10235 auto InfC =
MIRBuilder.buildConstant(IntTy, Inf);
10236 auto ExpMaskC =
MIRBuilder.buildConstant(IntTy, ExpMask);
10237 auto ZeroC =
MIRBuilder.buildConstant(IntTy, 0);
10239 auto Abs =
MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
10243 auto Res =
MIRBuilder.buildConstant(DstTy, 0);
10245 LLT DstTyCopy = DstTy;
10247 Res =
MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
10275 auto ExpBits =
MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
10278 Mask &= ~PartialCheck;
10287 else if (PartialCheck ==
fcZero)
10299 auto OneC =
MIRBuilder.buildConstant(IntTy, 1);
10300 auto VMinusOne =
MIRBuilder.buildSub(IntTy, V, OneC);
10301 auto SubnormalRes =
10303 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
10305 SubnormalRes =
MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
10306 appendToRes(SubnormalRes);
10313 else if (PartialCheck ==
fcInf)
10318 auto NegInfC =
MIRBuilder.buildConstant(IntTy, NegInf);
10325 auto InfWithQnanBitC =
MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
10326 if (PartialCheck ==
fcNan) {
10330 }
else if (PartialCheck ==
fcQNan) {
10340 Abs, InfWithQnanBitC);
10341 appendToRes(
MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
10348 APInt ExpLSB = ExpMask & ~(ExpMask.
shl(1));
10350 IntTy, Abs,
MIRBuilder.buildConstant(IntTy, ExpLSB));
10351 APInt MaxExpMinusOne = ExpMask - ExpLSB;
10354 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
10356 NormalRes =
MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
10359 DstTy, Sign,
MIRBuilder.buildConstant(DstTy, InversionMask));
10360 NormalRes =
MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
10362 appendToRes(NormalRes);
10366 MI.eraseFromParent();
10372 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
10373 MI.getFirst4RegLLTs();
10382 Op1Reg =
MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
10383 Op1Ty = MRI.getType(Op1Reg);
10384 Op2Reg =
MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
10385 Op2Ty = MRI.getType(Op2Reg);
10389 if (MaskTy.isScalar()) {
10397 MaskElt =
MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
10400 MaskTy = DstTy.changeElementType(
LLT::integer(DstTy.getScalarSizeInBits()));
10402 MIRBuilder.buildSExtOrTrunc(MaskTy.getScalarType(), MaskElt).getReg(0);
10404 if (DstTy.isVector()) {
10406 auto ShufSplat =
MIRBuilder.buildShuffleSplat(MaskTy, MaskElt);
10407 MaskReg = ShufSplat.getReg(0);
10411 }
else if (!DstTy.isVector()) {
10416 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
10420 if (!Op1Ty.getScalarType().isAnyScalar() &&
10421 !Op1Ty.getScalarType().isInteger())
10422 Op1Reg =
MIRBuilder.buildBitcast(Op1TyInt, Op1Reg).getReg(0);
10424 if (!Op2Ty.getScalarType().isAnyScalar() &&
10425 !Op2Ty.getScalarType().isInteger()) {
10427 Op2Ty.changeElementType(
LLT::integer(Op2Ty.getScalarSizeInBits()));
10428 Op2Reg =
MIRBuilder.buildBitcast(Op2TyInt, Op2Reg).getReg(0);
10431 auto NotMask =
MIRBuilder.buildNot(MaskTy, MaskReg);
10432 auto NewOp1 =
MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
10433 auto NewOp2 =
MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
10438 if (DstTy == Op1TyInt)
10441 auto Or =
MIRBuilder.buildOr(Op1TyInt, NewOp1, NewOp2);
10445 MI.eraseFromParent();
10451 unsigned Opcode =
MI.getOpcode();
10454 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
10455 : TargetOpcode::G_UDIV,
10456 {
MI.getOperand(0).getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
10458 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
10459 : TargetOpcode::G_UREM,
10460 {
MI.getOperand(1).getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
10461 MI.eraseFromParent();
10471 LLT DstTy = MRI.getType(
MI.getOperand(0).getReg());
10475 auto Shift =
MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
10478 MI.eraseFromParent();
10488 Register SrcReg =
MI.getOperand(1).getReg();
10489 LLT Ty = MRI.getType(SrcReg);
10490 auto Zero =
MIRBuilder.buildConstant(Ty, 0);
10493 MI.eraseFromParent();
10499 Register SrcReg =
MI.getOperand(1).getReg();
10500 Register DestReg =
MI.getOperand(0).getReg();
10502 auto Zero =
MIRBuilder.buildConstant(Ty, 0).getReg(0);
10503 auto Sub =
MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
10506 MI.eraseFromParent();
10512 assert((
MI.getOpcode() == TargetOpcode::G_ABDS ||
10513 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10514 "Expected G_ABDS or G_ABDU instruction");
10516 auto [DstReg, LHS, RHS] =
MI.getFirst3Regs();
10517 LLT Ty = MRI.getType(LHS);
10527 MIRBuilder.buildSelect(DstReg, ICmp, LHSSub, RHSSub);
10529 MI.eraseFromParent();
10535 assert((
MI.getOpcode() == TargetOpcode::G_ABDS ||
10536 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10537 "Expected G_ABDS or G_ABDU instruction");
10539 auto [DstReg, LHS, RHS] =
MI.getFirst3Regs();
10540 LLT Ty = MRI.getType(LHS);
10545 if (
MI.getOpcode() == TargetOpcode::G_ABDS) {
10546 MaxReg =
MIRBuilder.buildSMax(Ty, LHS, RHS).getReg(0);
10547 MinReg =
MIRBuilder.buildSMin(Ty, LHS, RHS).getReg(0);
10549 MaxReg =
MIRBuilder.buildUMax(Ty, LHS, RHS).getReg(0);
10550 MinReg =
MIRBuilder.buildUMin(Ty, LHS, RHS).getReg(0);
10552 MIRBuilder.buildSub(DstReg, MaxReg, MinReg);
10554 MI.eraseFromParent();
10559 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
10564 if (!(SrcTy.getScalarType().isAnyScalar() ||
10565 SrcTy.getScalarType().isInteger())) {
10567 SrcTy.changeElementType(
LLT::integer(SrcTy.getScalarSizeInBits()));
10568 CastedSrc =
MIRBuilder.buildBitcast(SrcTyInt, SrcReg).getReg(0);
10571 if (MRI.getType(DstReg) != TyInt) {
10575 .buildAnd(TyInt, CastedSrc,
10578 DstTy.getScalarSizeInBits())))
10590 MI.eraseFromParent();
10596 Register SrcReg =
MI.getOperand(1).getReg();
10597 LLT SrcTy = MRI.getType(SrcReg);
10598 LLT DstTy = MRI.getType(SrcReg);
10601 if (SrcTy.isScalar()) {
10606 MI.setDesc(
MIRBuilder.getTII().get(TargetOpcode::COPY));
10617 Register ListPtr =
MI.getOperand(1).getReg();
10618 LLT PtrTy = MRI.getType(ListPtr);
10625 auto VAList =
MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
10627 const Align A(
MI.getOperand(2).getImm());
10629 if (
A > TLI.getMinStackArgumentAlignment()) {
10631 MIRBuilder.buildConstant(PtrTyAsScalarTy,
A.value() - 1).getReg(0);
10632 auto AddDst =
MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
10633 auto AndDst =
MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst,
Log2(
A));
10634 VAList = AndDst.getReg(0);
10641 LLT LLTTy = MRI.getType(Dst);
10644 MIRBuilder.buildConstant(PtrTyAsScalarTy,
DL.getTypeAllocSize(Ty));
10645 auto Succ =
MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
10650 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
10652 Align EltAlignment =
DL.getABITypeAlign(Ty);
10655 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
10657 MI.eraseFromParent();
10662 [[maybe_unused]]
unsigned OpCode =
MI.getOpcode();
10663 assert((OpCode == TargetOpcode::G_SMULFIX ||
10664 OpCode == TargetOpcode::G_UMULFIX) &&
10665 "Operator must be either G_SMULFIX or G_UMULFIX!");
10666 auto [Dst, LHS, RHS] =
MI.getFirst3Regs();
10667 LLT Ty = MRI.getType(Dst);
10668 unsigned Scale =
MI.getOperand(3).getImm();
10672 MI.eraseFromParent();
10678 auto ShiftAmt =
MIRBuilder.buildConstant(WideTy, Scale);
10680 if (
MI.getOpcode() == TargetOpcode::G_SMULFIX) {
10689 if (
MI.getOpcode() == TargetOpcode::G_SMULFIX)
10696 MI.eraseFromParent();
10711 unsigned Limit,
const MemOp &
Op,
10712 unsigned DstAS,
unsigned SrcAS,
10713 const AttributeList &FuncAttributes,
10715 if (
Op.isMemcpyWithFixedDstAlign() &&
Op.getSrcAlign() <
Op.getDstAlign())
10725 if (
Op.isFixedDstAlign())
10726 while (
Op.getDstAlign() < Ty.getSizeInBytes() &&
10729 assert(Ty.getSizeInBits() > 0 &&
"Could not find valid type");
10733 unsigned NumMemOps = 0;
10736 unsigned TySize = Ty.getSizeInBytes();
10737 while (TySize >
Size) {
10747 assert(NewTySize > 0 &&
"Could not find appropriate type");
10754 if (NumMemOps &&
Op.allowOverlap() && NewTySize <
Size &&
10756 VT, DstAS,
Op.isFixedDstAlign() ?
Op.getDstAlign() :
Align(1),
10762 TySize = NewTySize;
10766 if (++NumMemOps > Limit)
10769 MemOps.push_back(Ty);
10779 unsigned NumBits = Ty.getScalarSizeInBits();
10781 if (!Ty.isVector() && ValVRegAndVal) {
10782 APInt Scalar = ValVRegAndVal->Value.
trunc(8);
10790 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
10811 uint64_t KnownLen,
Align Alignment,
10813 auto &MF = *
MI.getParent()->getParent();
10818 assert(KnownLen != 0 &&
"Have a zero length memset length!");
10820 bool DstAlignCanChange =
false;
10824 MachineInstr *FIDef =
getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10826 DstAlignCanChange =
true;
10828 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
10829 std::vector<LLT> MemOps;
10831 const auto &DstMMO = **
MI.memoperands_begin();
10832 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10835 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
10846 if (DstAlignCanChange) {
10849 Align NewAlign =
DL.getABITypeAlign(IRTy);
10850 if (NewAlign > Alignment) {
10851 Alignment = NewAlign;
10859 MachineIRBuilder MIB(
MI);
10861 LLT LargestTy = MemOps[0];
10862 for (
unsigned i = 1; i < MemOps.size(); i++)
10864 LargestTy = MemOps[i];
10876 LLT PtrTy = MRI.getType(Dst);
10877 unsigned DstOff = 0;
10878 unsigned Size = KnownLen;
10879 for (
unsigned I = 0;
I < MemOps.size();
I++) {
10880 LLT Ty = MemOps[
I];
10882 if (TySize >
Size) {
10885 assert(
I == MemOps.size() - 1 &&
I != 0);
10886 DstOff -= TySize -
Size;
10896 TLI.isTruncateFree(LargestVT, VT))
10897 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
10910 Ptr = MIB.buildObjectPtrOffset(PtrTy, Dst,
Offset).getReg(0);
10913 MIB.buildStore(
Value, Ptr, *StoreMMO);
10918 MI.eraseFromParent();
10924 assert(
MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10926 auto [Dst, Src, Len] =
MI.getFirst3Regs();
10928 const auto *MMOIt =
MI.memoperands_begin();
10930 bool IsVolatile =
MemOp->isVolatile();
10936 "inline memcpy with dynamic size is not yet supported");
10937 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10938 if (KnownLen == 0) {
10939 MI.eraseFromParent();
10943 const auto &DstMMO = **
MI.memoperands_begin();
10944 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
10945 Align DstAlign = DstMMO.getBaseAlign();
10946 Align SrcAlign = SrcMMO.getBaseAlign();
10948 return lowerMemcpyInline(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
10955 Align SrcAlign,
bool IsVolatile) {
10956 assert(
MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10957 return lowerMemcpy(
MI, Dst, Src, KnownLen,
10958 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
10965 Align SrcAlign,
bool IsVolatile) {
10966 auto &MF = *
MI.getParent()->getParent();
10971 assert(KnownLen != 0 &&
"Have a zero length memcpy length!");
10973 bool DstAlignCanChange =
false;
10975 Align Alignment = std::min(DstAlign, SrcAlign);
10979 DstAlignCanChange =
true;
10985 std::vector<LLT> MemOps;
10987 const auto &DstMMO = **
MI.memoperands_begin();
10988 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
10994 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
11000 if (DstAlignCanChange) {
11003 Align NewAlign =
DL.getABITypeAlign(IRTy);
11008 if (!
TRI->hasStackRealignment(MF))
11010 NewAlign = std::min(NewAlign, *StackAlign);
11012 if (NewAlign > Alignment) {
11013 Alignment = NewAlign;
11021 LLVM_DEBUG(
dbgs() <<
"Inlining memcpy: " <<
MI <<
" into loads & stores\n");
11023 MachineIRBuilder MIB(
MI);
11029 unsigned CurrOffset = 0;
11030 unsigned Size = KnownLen;
11031 for (
auto CopyTy : MemOps) {
11034 if (CopyTy.getSizeInBytes() >
Size)
11035 CurrOffset -= CopyTy.getSizeInBytes() -
Size;
11046 if (CurrOffset != 0) {
11047 LLT SrcTy = MRI.getType(Src);
11051 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src,
Offset).getReg(0);
11053 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
11057 if (CurrOffset != 0) {
11058 LLT DstTy = MRI.getType(Dst);
11059 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst,
Offset).getReg(0);
11061 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
11062 CurrOffset += CopyTy.getSizeInBytes();
11063 Size -= CopyTy.getSizeInBytes();
11066 MI.eraseFromParent();
11072 uint64_t KnownLen,
Align DstAlign,
Align SrcAlign,
11074 auto &MF = *
MI.getParent()->getParent();
11079 assert(KnownLen != 0 &&
"Have a zero length memmove length!");
11081 bool DstAlignCanChange =
false;
11084 Align Alignment = std::min(DstAlign, SrcAlign);
11086 MachineInstr *FIDef =
getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
11088 DstAlignCanChange =
true;
11090 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
11091 std::vector<LLT> MemOps;
11093 const auto &DstMMO = **
MI.memoperands_begin();
11094 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
11095 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
11096 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
11103 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
11109 if (DstAlignCanChange) {
11112 Align NewAlign =
DL.getABITypeAlign(IRTy);
11117 if (!
TRI->hasStackRealignment(MF))
11118 if (MaybeAlign StackAlign =
DL.getStackAlignment())
11119 NewAlign = std::min(NewAlign, *StackAlign);
11121 if (NewAlign > Alignment) {
11122 Alignment = NewAlign;
11130 LLVM_DEBUG(
dbgs() <<
"Inlining memmove: " <<
MI <<
" into loads & stores\n");
11132 MachineIRBuilder MIB(
MI);
11136 unsigned CurrOffset = 0;
11137 SmallVector<Register, 16> LoadVals;
11138 for (
auto CopyTy : MemOps) {
11145 if (CurrOffset != 0) {
11146 LLT SrcTy = MRI.getType(Src);
11149 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src,
Offset).getReg(0);
11151 LoadVals.
push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
11152 CurrOffset += CopyTy.getSizeInBytes();
11156 for (
unsigned I = 0;
I < MemOps.size(); ++
I) {
11157 LLT CopyTy = MemOps[
I];
11163 if (CurrOffset != 0) {
11164 LLT DstTy = MRI.getType(Dst);
11167 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst,
Offset).getReg(0);
11169 MIB.buildStore(LoadVals[
I], StorePtr, *StoreMMO);
11172 MI.eraseFromParent();
11178 const unsigned Opc =
MI.getOpcode();
11181 assert((
Opc == TargetOpcode::G_MEMCPY ||
Opc == TargetOpcode::G_MEMMOVE ||
11182 Opc == TargetOpcode::G_MEMSET) &&
11183 "Expected memcpy like instruction");
11185 auto MMOIt =
MI.memoperands_begin();
11190 auto [Dst, Src, Len] =
MI.getFirst3Regs();
11192 if (
Opc != TargetOpcode::G_MEMSET) {
11193 assert(MMOIt !=
MI.memoperands_end() &&
"Expected a second MMO on MI");
11194 MemOp = *(++MMOIt);
11195 SrcAlign =
MemOp->getBaseAlign();
11200 if (!LenVRegAndVal)
11202 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
11204 if (KnownLen == 0) {
11205 MI.eraseFromParent();
11209 if (MaxLen && KnownLen > MaxLen)
11212 bool IsVolatile =
MemOp->isVolatile();
11213 if (
Opc == TargetOpcode::G_MEMCPY) {
11214 auto &MF = *
MI.getParent()->getParent();
11217 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
11218 return lowerMemcpy(
MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
11221 if (
Opc == TargetOpcode::G_MEMMOVE)
11222 return lowerMemmove(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
11223 if (
Opc == TargetOpcode::G_MEMSET)
11224 return lowerMemset(
MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static LegalizerHelper::LegalizeResult loweri64tof16ITOFP(MachineInstr &MI, Register Dst, LLT DstTy, Register Src, LLT SrcTy, MachineIRBuilder &MIRBuilder)
i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16.
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
static const fltSemantics & IEEEsingle()
static constexpr roundingMode rmTowardZero
static const fltSemantics & IEEEdouble()
static constexpr roundingMode rmNearestTiesToEven
opStatus
IEEE-754R 7: Default exception handling.
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
APInt bitcastToAPInt() const
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
uint64_t getZExtValue() const
Get zero extended value.
unsigned getActiveBits() const
Compute the number of active bits in the value.
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
void negate()
Negate this APInt in place.
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
APInt shl(unsigned shiftAmt) const
Left-shift function.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
bool empty() const
Check if the array is empty.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
AttributeList getAttributes() const
Return the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getValueReg() const
Get the stored value register.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Register getSubVec() const
Register getBigVec() const
uint64_t getIndexImm() const
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
Represents a threeway compare.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
static constexpr LLT float64()
Get a 64-bit IEEE double value.
LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
LLT getScalarType() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr ElementCount getElementCount() const
static constexpr LLT float16()
Get a 16-bit IEEE half value.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isPointerOrPointerVector() const
static LLT integer(unsigned SizeInBits)
static constexpr LLT bfloat16()
constexpr LLT changeVectorElementType(LLT NewEltTy) const
Returns a vector with the same number of elements but the new element type.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
LLT changeVectorElementCount(ElementCount EC) const
Return a vector with the same element type and the new element count.
static constexpr LLT float32()
Get a 32-bit IEEE float value.
static LLT floatIEEE(unsigned SizeInBits)
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
This is an important class for using LLVM in a threaded context.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LLVM_ABI LegalizeResult lowerShlSat(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LLVM_ABI LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LLVM_ABI LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSSUBE(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerLoad(GAnyLoad &MI)
LLVM_ABI LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
LLVM_ABI void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizerHelper::LegalizeResult createAtomicLibcall(MachineInstr &MI) const
LLVM_ABI LegalizeResult lowerFConstant(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerBitreverse(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LLVM_ABI LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTLS(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerEXT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerStore(GStore &MI)
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult narrowScalarShiftMultiway(MachineInstr &MI, LLT TargetTy)
Multi-way shift legalization: directly split wide shifts into target-sized parts in a single step,...
LLVM_ABI LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI Register buildConstantShiftPart(unsigned Opcode, unsigned PartIdx, unsigned NumParts, ArrayRef< Register > SrcParts, const ShiftParams &Params, LLT TargetTy, LLT ShiftAmtTy)
Generates a single output part for constant shifts using direct indexing.
LLVM_ABI void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LLVM_ABI LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LLVM_ABI LegalizeResult lowerFPTOUI(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LLVM_ABI LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LLVM_ABI LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B, const LibcallLoweringInfo *Libcalls=nullptr)
LLVM_ABI LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LLVM_ABI LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LLVM_ABI LegalizeResult lowerBitcast(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerInsert(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtract(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LLVM_ABI LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPOWI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPExtAndTruncMem(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFAbs(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI)
const LegalizerInfo & getLegalizerInfo() const
Expose LegalizerInfo so the clients can re-use.
LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LLVM_ABI LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarShiftByConstantMultiway(MachineInstr &MI, const APInt &Amt, LLT TargetTy, LLT ShiftAmtTy)
Optimized path for constant shift amounts using static indexing.
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMODF(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LLVM_ABI LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSADDE(MachineInstr &MI)
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LLVM_ABI LegalizeResult lowerFunnelShift(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F32_TO_BF16(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI LegalizeResult conversionLibcall(MachineInstr &MI, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, bool IsSigned=false) const
LLVM_ABI void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LLVM_ABI LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI)
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LLVM_ABI LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LLVM_ABI LegalizeResult lowerFPTOSI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerUITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerShuffleVector(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerMergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult createMemLibcall(MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Create a libcall to memcpy et al.
LLVM_ABI LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMulfix(MachineInstr &MI)
LLVM_ABI void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LLVM_ABI LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult createLibcall(const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr) const
Helper function that creates a libcall to the given Name using the given calling convention CC.
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LLVM_ABI LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerDIVREM(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI Register buildVariableShiftPart(unsigned Opcode, Register MainOperand, Register ShiftAmt, LLT TargetTy, Register CarryOperand=Register())
Generates a shift part with carry for variable shifts.
LLVM_ABI void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
LLVM_ABI void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LLVM_ABI LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LLVM_ABI LegalizeResult lowerStackRestore(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerStackSave(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBswap(MachineInstr &MI)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LLVM_ABI LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LLVM_ABI LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver) const
LLVM_ABI LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
LLVM_ABI LegalizeResult lowerFMinimumMaximum(MachineInstr &MI)
Tracks which library functions to use for a particular subtarget.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
LLVM_ABI StringRef getString() const
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
LLVM_ABI iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLT getLLTTy(const MachineRegisterInfo &MRI) const
Represent a constant reference to a string, i.e.
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
@ UndefinedBooleanContent
@ ZeroOrNegativeOneBooleanContent
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, DriverKit, XROS, or bridgeOS).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
static LLVM_ABI Type * getX86_FP80Ty(LLVMContext &C)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Type * getType() const
All values are typed, get the type of this value.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_ABI LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
unsigned M1(unsigned Val)
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
auto dyn_cast_or_null(const Y &Val)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
@ Success
The lock was released successfully.
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
To bit_cast(const From &from) noexcept
@ Mul
Product of integers.
@ FSub
Subtraction of floats.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
unsigned Log2(Align A)
Returns the log2 of the alignment.
LLVM_ABI LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
@ Custom
The result value requires a custom uniformity check.
LLVM_ABI void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
SmallVector< ISD::ArgFlagsTy, 4 > Flags
CallingConv::ID CallConv
Calling convention to be used for the call.
bool isKnownNeverZero() const
Return true if it's known this can never be a zero.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.