43#define DEBUG_TYPE "legalizer"
56static std::pair<int, int>
62 unsigned NumParts =
Size / NarrowSize;
63 unsigned LeftoverSize =
Size - NumParts * NarrowSize;
66 if (LeftoverSize == 0)
71 if (LeftoverSize % EltSize != 0)
80 return std::make_pair(NumParts, NumLeftover);
88 switch (Ty.getSizeInBits()) {
129 auto Step = LI.getAction(
MI, MRI);
130 switch (Step.Action) {
145 return bitcast(
MI, Step.TypeIdx, Step.NewType);
148 return lower(
MI, Step.TypeIdx, Step.NewType);
157 return LI.legalizeCustom(*
this,
MI, LocObserver) ?
Legalized
165void LegalizerHelper::insertParts(
Register DstReg,
187 assert(LeftoverRegs.
size() == 1 &&
"Expected one leftover register");
189 AllRegs.append(LeftoverRegs.
begin(), LeftoverRegs.
end());
190 return mergeMixedSubvectors(DstReg, AllRegs);
196 extractGCDType(GCDRegs, GCDTy, PartReg);
197 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
198 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
203 LLT Ty = MRI.getType(
Reg);
211void LegalizerHelper::mergeMixedSubvectors(
Register DstReg,
214 for (
unsigned i = 0; i < PartRegs.
size() - 1; ++i)
215 appendVectorElts(AllElts, PartRegs[i]);
218 if (!MRI.getType(Leftover).isVector())
221 appendVectorElts(AllElts, Leftover);
223 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
229 assert(
MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
231 const int StartIdx = Regs.
size();
232 const int NumResults =
MI.getNumOperands() - 1;
234 for (
int I = 0;
I != NumResults; ++
I)
235 Regs[StartIdx +
I] =
MI.getOperand(
I).getReg();
240 LLT SrcTy = MRI.getType(SrcReg);
241 if (SrcTy == GCDTy) {
247 auto Unmerge =
MIRBuilder.buildUnmerge(GCDTy, SrcReg);
254 LLT SrcTy = MRI.getType(SrcReg);
256 extractGCDType(Parts, GCDTy, SrcReg);
260LLT LegalizerHelper::buildLCMMergePieces(
LLT DstTy,
LLT NarrowTy,
LLT GCDTy,
262 unsigned PadStrategy) {
267 int NumOrigSrc = VRegs.
size();
273 if (NumOrigSrc < NumParts * NumSubParts) {
274 if (PadStrategy == TargetOpcode::G_ZEXT)
275 PadReg =
MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
276 else if (PadStrategy == TargetOpcode::G_ANYEXT)
277 PadReg =
MIRBuilder.buildUndef(GCDTy).getReg(0);
279 assert(PadStrategy == TargetOpcode::G_SEXT);
284 PadReg =
MIRBuilder.buildAShr(GCDTy, VRegs.
back(), ShiftAmt).getReg(0);
300 for (
int I = 0;
I != NumParts; ++
I) {
301 bool AllMergePartsArePadding =
true;
304 for (
int J = 0; J != NumSubParts; ++J) {
305 int Idx =
I * NumSubParts + J;
306 if (Idx >= NumOrigSrc) {
307 SubMerge[J] = PadReg;
311 SubMerge[J] = VRegs[Idx];
314 AllMergePartsArePadding =
false;
320 if (AllMergePartsArePadding && !AllPadReg) {
321 if (PadStrategy == TargetOpcode::G_ANYEXT)
322 AllPadReg =
MIRBuilder.buildUndef(NarrowTy).getReg(0);
323 else if (PadStrategy == TargetOpcode::G_ZEXT)
324 AllPadReg =
MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
333 Remerge[
I] = AllPadReg;
337 if (NumSubParts == 1)
338 Remerge[
I] = SubMerge[0];
340 Remerge[
I] =
MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
343 if (AllMergePartsArePadding && !AllPadReg)
344 AllPadReg = Remerge[
I];
347 VRegs = std::move(Remerge);
351void LegalizerHelper::buildWidenedRemergeToDst(
Register DstReg,
LLT LCMTy,
353 LLT DstTy = MRI.getType(DstReg);
358 if (DstTy == LCMTy) {
359 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
363 auto Remerge =
MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
372 UnmergeDefs[0] = DstReg;
373 for (
unsigned I = 1;
I != NumDefs; ++
I)
374 UnmergeDefs[
I] = MRI.createGenericVirtualRegister(DstTy);
377 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
385#define RTLIBCASE_INT(LibcallPrefix) \
389 return RTLIB::LibcallPrefix##32; \
391 return RTLIB::LibcallPrefix##64; \
393 return RTLIB::LibcallPrefix##128; \
395 llvm_unreachable("unexpected size"); \
399#define RTLIBCASE(LibcallPrefix) \
403 return RTLIB::LibcallPrefix##32; \
405 return RTLIB::LibcallPrefix##64; \
407 return RTLIB::LibcallPrefix##80; \
409 return RTLIB::LibcallPrefix##128; \
411 llvm_unreachable("unexpected size"); \
416 case TargetOpcode::G_LROUND:
418 case TargetOpcode::G_LLROUND:
420 case TargetOpcode::G_MUL:
422 case TargetOpcode::G_SDIV:
424 case TargetOpcode::G_UDIV:
426 case TargetOpcode::G_SREM:
428 case TargetOpcode::G_UREM:
430 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
432 case TargetOpcode::G_FADD:
434 case TargetOpcode::G_FSUB:
436 case TargetOpcode::G_FMUL:
438 case TargetOpcode::G_FDIV:
440 case TargetOpcode::G_FEXP:
442 case TargetOpcode::G_FEXP2:
444 case TargetOpcode::G_FEXP10:
446 case TargetOpcode::G_FREM:
448 case TargetOpcode::G_FPOW:
450 case TargetOpcode::G_FPOWI:
452 case TargetOpcode::G_FMA:
454 case TargetOpcode::G_FSIN:
456 case TargetOpcode::G_FCOS:
458 case TargetOpcode::G_FTAN:
460 case TargetOpcode::G_FASIN:
462 case TargetOpcode::G_FACOS:
464 case TargetOpcode::G_FATAN:
466 case TargetOpcode::G_FATAN2:
468 case TargetOpcode::G_FSINH:
470 case TargetOpcode::G_FCOSH:
472 case TargetOpcode::G_FTANH:
474 case TargetOpcode::G_FSINCOS:
476 case TargetOpcode::G_FMODF:
478 case TargetOpcode::G_FLOG10:
480 case TargetOpcode::G_FLOG:
482 case TargetOpcode::G_FLOG2:
484 case TargetOpcode::G_FLDEXP:
486 case TargetOpcode::G_FCEIL:
488 case TargetOpcode::G_FFLOOR:
490 case TargetOpcode::G_FMINNUM:
492 case TargetOpcode::G_FMAXNUM:
494 case TargetOpcode::G_FMINIMUMNUM:
496 case TargetOpcode::G_FMAXIMUMNUM:
498 case TargetOpcode::G_FSQRT:
500 case TargetOpcode::G_FRINT:
502 case TargetOpcode::G_FNEARBYINT:
504 case TargetOpcode::G_INTRINSIC_TRUNC:
506 case TargetOpcode::G_INTRINSIC_ROUND:
508 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
510 case TargetOpcode::G_INTRINSIC_LRINT:
512 case TargetOpcode::G_INTRINSIC_LLRINT:
532 AttributeList CallerAttrs =
F.getAttributes();
533 if (AttrBuilder(
F.getContext(), CallerAttrs.getRetAttrs())
534 .removeAttribute(Attribute::NoAlias)
535 .removeAttribute(Attribute::NonNull)
540 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
541 CallerAttrs.hasRetAttr(Attribute::SExt))
552 if (
MI.getOpcode() == TargetOpcode::G_BZERO)
559 if (!VReg.
isVirtual() || VReg !=
Next->getOperand(1).getReg())
567 if (Ret ==
MBB.instr_end() || !Ret->isReturn())
570 if (Ret->getNumImplicitOperands() != 1)
573 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
590 auto &CLI = *
MIRBuilder.getMF().getSubtarget().getCallLowering();
595 Info.OrigRet = Result;
598 (Result.Ty->isVoidTy() ||
599 Result.Ty ==
MIRBuilder.getMF().getFunction().getReturnType()) &&
607 if (
MI && Info.LoweredTailCall) {
608 assert(Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
618 (
Next->isCopy() ||
Next->isReturn() ||
Next->isDebugInstr()) &&
619 "Expected instr following MI to be return or debug inst?");
622 Next->eraseFromParent();
623 }
while (
MI->getNextNode());
638 RTLIB::LibcallImpl LibcallImpl = Libcalls->getLibcallImpl(
Libcall);
639 if (LibcallImpl == RTLIB::Unsupported)
643 const CallingConv::ID CC = Libcalls->getLibcallImplCallingConv(LibcallImpl);
657 Args.push_back({MO.getReg(), OpType, 0});
676 unsigned AddrSpace =
DL.getAllocaAddrSpace();
694 if (LibcallResult != LegalizeResult::Legalized)
702 MIRBuilder.
buildLoad(DstSin, StackPtrSin, *LoadMMOSin);
703 MIRBuilder.
buildLoad(DstCos, StackPtrCos, *LoadMMOCos);
704 MI.eraseFromParent();
719 LLT DstTy = MRI.getType(DstFrac);
724 unsigned AddrSpace =
DL.getAllocaAddrSpace();
725 MachinePointerInfo PtrInfo;
734 {{Src, OpType, 0}, {StackPtrInt, PointerType::get(Ctx, AddrSpace), 1}},
737 if (LibcallResult != LegalizeResult::Legalized)
743 MIRBuilder.
buildLoad(DstInt, StackPtrInt, *LoadMMOInt);
744 MI.eraseFromParent();
755 case TargetOpcode::G_FPEXT:
757 case TargetOpcode::G_FPTRUNC:
759 case TargetOpcode::G_FPTOSI:
761 case TargetOpcode::G_FPTOUI:
763 case TargetOpcode::G_SITOFP:
765 case TargetOpcode::G_UITOFP:
775 if (FromType->isIntegerTy()) {
776 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
777 Arg.
Flags[0].setSExt();
779 Arg.
Flags[0].setZExt();
790 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
794 for (
unsigned i = 0; i <
MI.getNumOperands() - 1; ++i) {
798 LLT OpLLT = MRI.getType(Reg);
799 Type *OpTy =
nullptr;
804 Args.push_back({Reg, OpTy, 0});
807 auto &CLI = *
MIRBuilder.getMF().getSubtarget().getCallLowering();
808 RTLIB::Libcall RTLibcall;
809 unsigned Opc =
MI.getOpcode();
811 case TargetOpcode::G_BZERO:
812 RTLibcall = RTLIB::BZERO;
814 case TargetOpcode::G_MEMCPY:
815 RTLibcall = RTLIB::MEMCPY;
816 Args[0].Flags[0].setReturned();
818 case TargetOpcode::G_MEMMOVE:
819 RTLibcall = RTLIB::MEMMOVE;
820 Args[0].Flags[0].setReturned();
822 case TargetOpcode::G_MEMSET:
823 RTLibcall = RTLIB::MEMSET;
824 Args[0].Flags[0].setReturned();
833 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
836 if (RTLibcallImpl == RTLIB::Unsupported) {
843 Info.
CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
850 MI.getOperand(
MI.getNumOperands() - 1).getImm() &&
857 if (Info.LoweredTailCall) {
858 assert(Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
868 (
Next->isCopy() ||
Next->isReturn() ||
Next->isDebugInstr()) &&
869 "Expected instr following MI to be return or debug inst?");
872 Next->eraseFromParent();
873 }
while (
MI.getNextNode());
883 unsigned Opc =
MI.getOpcode();
885 auto &MMO = AtomicMI.getMMO();
886 auto Ordering = MMO.getMergedOrdering();
887 LLT MemType = MMO.getMemoryType();
890 return RTLIB::UNKNOWN_LIBCALL;
892#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
894 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
896 case TargetOpcode::G_ATOMIC_CMPXCHG:
897 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
898 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
899 return getOutlineAtomicHelper(LC, Ordering, MemSize);
901 case TargetOpcode::G_ATOMICRMW_XCHG: {
902 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
903 return getOutlineAtomicHelper(LC, Ordering, MemSize);
905 case TargetOpcode::G_ATOMICRMW_ADD:
906 case TargetOpcode::G_ATOMICRMW_SUB: {
907 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
908 return getOutlineAtomicHelper(LC, Ordering, MemSize);
910 case TargetOpcode::G_ATOMICRMW_AND: {
911 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
912 return getOutlineAtomicHelper(LC, Ordering, MemSize);
914 case TargetOpcode::G_ATOMICRMW_OR: {
915 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
916 return getOutlineAtomicHelper(LC, Ordering, MemSize);
918 case TargetOpcode::G_ATOMICRMW_XOR: {
919 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
920 return getOutlineAtomicHelper(LC, Ordering, MemSize);
923 return RTLIB::UNKNOWN_LIBCALL;
936 unsigned Opc =
MI.getOpcode();
938 case TargetOpcode::G_ATOMIC_CMPXCHG:
939 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
942 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
943 MI.getFirst4RegLLTs();
946 if (
Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
947 std::tie(Ret, RetLLT,
Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
948 NewLLT) =
MI.getFirst5RegLLTs();
958 case TargetOpcode::G_ATOMICRMW_XCHG:
959 case TargetOpcode::G_ATOMICRMW_ADD:
960 case TargetOpcode::G_ATOMICRMW_SUB:
961 case TargetOpcode::G_ATOMICRMW_AND:
962 case TargetOpcode::G_ATOMICRMW_OR:
963 case TargetOpcode::G_ATOMICRMW_XOR: {
964 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] =
MI.getFirst3RegLLTs();
967 if (
Opc == TargetOpcode::G_ATOMICRMW_AND)
971 else if (
Opc == TargetOpcode::G_ATOMICRMW_SUB)
986 auto &CLI = *
MIRBuilder.getMF().getSubtarget().getCallLowering();
988 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
991 if (RTLibcallImpl == RTLIB::Unsupported) {
998 Info.
CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
1012static RTLIB::Libcall
1014 RTLIB::Libcall RTLibcall;
1015 switch (
MI.getOpcode()) {
1016 case TargetOpcode::G_GET_FPENV:
1017 RTLibcall = RTLIB::FEGETENV;
1019 case TargetOpcode::G_SET_FPENV:
1020 case TargetOpcode::G_RESET_FPENV:
1021 RTLibcall = RTLIB::FESETENV;
1023 case TargetOpcode::G_GET_FPMODE:
1024 RTLibcall = RTLIB::FEGETMODE;
1026 case TargetOpcode::G_SET_FPMODE:
1027 case TargetOpcode::G_RESET_FPMODE:
1028 RTLibcall = RTLIB::FESETMODE;
1060 LLT StateTy = MRI.getType(Dst);
1063 MachinePointerInfo TempPtrInfo;
1067 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
1072 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}), LocObserver,
1080 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
1098 LLT StateTy = MRI.getType(Src);
1101 MachinePointerInfo TempPtrInfo;
1110 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
1115 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1116 LocObserver,
nullptr);
1122static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1124#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1128 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1130 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1132 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1134 llvm_unreachable("unexpected size"); \
1165 LLT OpLLT = MRI.getType(
Cmp->getLHSReg());
1168 OpLLT != MRI.getType(
Cmp->getRHSReg()))
1175 LLT DstTy = MRI.getType(DstReg);
1176 const auto Cond =
Cmp->getCond();
1181 const auto BuildLibcall = [&](
const RTLIB::Libcall
Libcall,
1186 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1190 {{
Cmp->getLHSReg(), OpType, 0}, {
Cmp->getRHSReg(), OpType, 1}},
1197 .buildICmp(ICmpPred, Res, Temp,
MIRBuilder.buildConstant(TempLLT, 0))
1203 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1205 if (BuildLibcall(
Libcall, ICmpPred, DstReg)) {
1218 const auto [OeqLibcall, OeqPred] =
1220 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1222 const auto [UnoLibcall, UnoPred] =
1224 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1239 const auto [OeqLibcall, OeqPred] =
1244 const auto [UnoLibcall, UnoPred] =
1249 if (NotOeq && NotUno)
1268 const auto [InversedLibcall, InversedPred] =
1270 if (!BuildLibcall(InversedLibcall,
1295 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
1297 unsigned PtrSize =
DL.getPointerSizeInBits(AddrSpace);
1300 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1306 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}), LocObserver, &
MI);
1311 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
1313 switch (
MI.getOpcode()) {
1316 case TargetOpcode::G_MUL:
1317 case TargetOpcode::G_SDIV:
1318 case TargetOpcode::G_UDIV:
1319 case TargetOpcode::G_SREM:
1320 case TargetOpcode::G_UREM:
1321 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1322 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1330 case TargetOpcode::G_FADD:
1331 case TargetOpcode::G_FSUB:
1332 case TargetOpcode::G_FMUL:
1333 case TargetOpcode::G_FDIV:
1334 case TargetOpcode::G_FMA:
1335 case TargetOpcode::G_FPOW:
1336 case TargetOpcode::G_FREM:
1337 case TargetOpcode::G_FCOS:
1338 case TargetOpcode::G_FSIN:
1339 case TargetOpcode::G_FTAN:
1340 case TargetOpcode::G_FACOS:
1341 case TargetOpcode::G_FASIN:
1342 case TargetOpcode::G_FATAN:
1343 case TargetOpcode::G_FATAN2:
1344 case TargetOpcode::G_FCOSH:
1345 case TargetOpcode::G_FSINH:
1346 case TargetOpcode::G_FTANH:
1347 case TargetOpcode::G_FLOG10:
1348 case TargetOpcode::G_FLOG:
1349 case TargetOpcode::G_FLOG2:
1350 case TargetOpcode::G_FEXP:
1351 case TargetOpcode::G_FEXP2:
1352 case TargetOpcode::G_FEXP10:
1353 case TargetOpcode::G_FCEIL:
1354 case TargetOpcode::G_FFLOOR:
1355 case TargetOpcode::G_FMINNUM:
1356 case TargetOpcode::G_FMAXNUM:
1357 case TargetOpcode::G_FMINIMUMNUM:
1358 case TargetOpcode::G_FMAXIMUMNUM:
1359 case TargetOpcode::G_FSQRT:
1360 case TargetOpcode::G_FRINT:
1361 case TargetOpcode::G_FNEARBYINT:
1362 case TargetOpcode::G_INTRINSIC_TRUNC:
1363 case TargetOpcode::G_INTRINSIC_ROUND:
1364 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1365 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1369 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1377 case TargetOpcode::G_FSINCOS: {
1378 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1382 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1387 case TargetOpcode::G_FMODF: {
1388 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1392 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1397 case TargetOpcode::G_LROUND:
1398 case TargetOpcode::G_LLROUND:
1399 case TargetOpcode::G_INTRINSIC_LRINT:
1400 case TargetOpcode::G_INTRINSIC_LLRINT: {
1401 LLT LLTy = MRI.getType(
MI.getOperand(1).getReg());
1405 Ctx, MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits());
1407 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1413 {{
MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &
MI);
1416 MI.eraseFromParent();
1419 case TargetOpcode::G_FPOWI:
1420 case TargetOpcode::G_FLDEXP: {
1421 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1425 Ctx, MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits());
1427 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1432 {
MI.getOperand(1).getReg(), HLTy, 0},
1433 {
MI.getOperand(2).getReg(), ITy, 1}};
1434 Args[1].Flags[0].setSExt();
1436 Libcall, {
MI.getOperand(0).getReg(), HLTy, 0}, Args, LocObserver, &
MI);
1441 case TargetOpcode::G_FPEXT:
1442 case TargetOpcode::G_FPTRUNC: {
1445 if (!FromTy || !ToTy)
1452 case TargetOpcode::G_FCMP: {
1456 MI.eraseFromParent();
1459 case TargetOpcode::G_FPTOSI:
1460 case TargetOpcode::G_FPTOUI: {
1464 unsigned ToSize = MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
1465 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1468 FromTy, LocObserver);
1473 case TargetOpcode::G_SITOFP:
1474 case TargetOpcode::G_UITOFP: {
1475 unsigned FromSize = MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
1478 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1480 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SITOFP;
1487 case TargetOpcode::G_ATOMICRMW_XCHG:
1488 case TargetOpcode::G_ATOMICRMW_ADD:
1489 case TargetOpcode::G_ATOMICRMW_SUB:
1490 case TargetOpcode::G_ATOMICRMW_AND:
1491 case TargetOpcode::G_ATOMICRMW_OR:
1492 case TargetOpcode::G_ATOMICRMW_XOR:
1493 case TargetOpcode::G_ATOMIC_CMPXCHG:
1494 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1500 case TargetOpcode::G_BZERO:
1501 case TargetOpcode::G_MEMCPY:
1502 case TargetOpcode::G_MEMMOVE:
1503 case TargetOpcode::G_MEMSET: {
1508 MI.eraseFromParent();
1511 case TargetOpcode::G_GET_FPENV:
1512 case TargetOpcode::G_GET_FPMODE: {
1518 case TargetOpcode::G_SET_FPENV:
1519 case TargetOpcode::G_SET_FPMODE: {
1525 case TargetOpcode::G_RESET_FPENV:
1526 case TargetOpcode::G_RESET_FPMODE: {
1534 MI.eraseFromParent();
1541 uint64_t SizeOp0 = MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
1544 switch (
MI.getOpcode()) {
1547 case TargetOpcode::G_IMPLICIT_DEF: {
1549 LLT DstTy = MRI.getType(DstReg);
1557 if (SizeOp0 % NarrowSize != 0) {
1562 MI.eraseFromParent();
1566 int NumParts = SizeOp0 / NarrowSize;
1569 for (
int i = 0; i < NumParts; ++i)
1573 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1575 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1576 MI.eraseFromParent();
1579 case TargetOpcode::G_CONSTANT: {
1580 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
1581 const APInt &Val =
MI.getOperand(1).getCImm()->getValue();
1582 unsigned TotalSize = Ty.getSizeInBits();
1584 int NumParts = TotalSize / NarrowSize;
1587 for (
int I = 0;
I != NumParts; ++
I) {
1588 unsigned Offset =
I * NarrowSize;
1595 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1597 if (LeftoverBits != 0) {
1601 Val.
lshr(NumParts * NarrowSize).
trunc(LeftoverBits));
1605 insertParts(
MI.getOperand(0).getReg(),
1606 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1608 MI.eraseFromParent();
1611 case TargetOpcode::G_SEXT:
1612 case TargetOpcode::G_ZEXT:
1613 case TargetOpcode::G_ANYEXT:
1615 case TargetOpcode::G_TRUNC: {
1619 uint64_t SizeOp1 = MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
1621 LLVM_DEBUG(
dbgs() <<
"Can't narrow trunc to type " << NarrowTy <<
"\n");
1625 auto Unmerge =
MIRBuilder.buildUnmerge(NarrowTy,
MI.getOperand(1));
1626 MIRBuilder.buildCopy(
MI.getOperand(0), Unmerge.getReg(0));
1627 MI.eraseFromParent();
1630 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1631 case TargetOpcode::G_FREEZE: {
1635 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
1640 auto Unmerge =
MIRBuilder.buildUnmerge(NarrowTy,
MI.getOperand(1).getReg());
1642 for (
unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1644 MIRBuilder.buildInstr(
MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1648 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0).getReg(), Parts);
1649 MI.eraseFromParent();
1652 case TargetOpcode::G_ADD:
1653 case TargetOpcode::G_SUB:
1654 case TargetOpcode::G_SADDO:
1655 case TargetOpcode::G_SSUBO:
1656 case TargetOpcode::G_SADDE:
1657 case TargetOpcode::G_SSUBE:
1658 case TargetOpcode::G_UADDO:
1659 case TargetOpcode::G_USUBO:
1660 case TargetOpcode::G_UADDE:
1661 case TargetOpcode::G_USUBE:
1663 case TargetOpcode::G_MUL:
1664 case TargetOpcode::G_UMULH:
1666 case TargetOpcode::G_EXTRACT:
1668 case TargetOpcode::G_INSERT:
1670 case TargetOpcode::G_LOAD: {
1672 Register DstReg = LoadMI.getDstReg();
1673 LLT DstTy = MRI.getType(DstReg);
1677 if (8 * LoadMI.getMemSize().getValue() != DstTy.
getSizeInBits()) {
1678 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1679 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1681 LoadMI.eraseFromParent();
1687 case TargetOpcode::G_ZEXTLOAD:
1688 case TargetOpcode::G_SEXTLOAD: {
1690 Register DstReg = LoadMI.getDstReg();
1691 Register PtrReg = LoadMI.getPointerReg();
1693 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1694 auto &MMO = LoadMI.getMMO();
1697 if (MemSize == NarrowSize) {
1699 }
else if (MemSize < NarrowSize) {
1700 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1701 }
else if (MemSize > NarrowSize) {
1711 LoadMI.eraseFromParent();
1714 case TargetOpcode::G_STORE: {
1717 Register SrcReg = StoreMI.getValueReg();
1718 LLT SrcTy = MRI.getType(SrcReg);
1719 if (SrcTy.isVector())
1722 int NumParts = SizeOp0 / NarrowSize;
1724 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1725 if (SrcTy.isVector() && LeftoverBits != 0)
1728 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1729 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1731 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1732 StoreMI.eraseFromParent();
1738 case TargetOpcode::G_SELECT:
1740 case TargetOpcode::G_AND:
1741 case TargetOpcode::G_OR:
1742 case TargetOpcode::G_XOR: {
1754 case TargetOpcode::G_SHL:
1755 case TargetOpcode::G_LSHR:
1756 case TargetOpcode::G_ASHR:
1758 case TargetOpcode::G_CTLZ:
1759 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1760 case TargetOpcode::G_CTTZ:
1761 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1762 case TargetOpcode::G_CTLS:
1763 case TargetOpcode::G_CTPOP:
1765 switch (
MI.getOpcode()) {
1766 case TargetOpcode::G_CTLZ:
1767 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1769 case TargetOpcode::G_CTTZ:
1770 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1772 case TargetOpcode::G_CTPOP:
1774 case TargetOpcode::G_CTLS:
1784 case TargetOpcode::G_INTTOPTR:
1792 case TargetOpcode::G_PTRTOINT:
1800 case TargetOpcode::G_PHI: {
1803 if (SizeOp0 % NarrowSize != 0)
1806 unsigned NumParts = SizeOp0 / NarrowSize;
1810 for (
unsigned i = 1; i <
MI.getNumOperands(); i += 2) {
1818 for (
unsigned i = 0; i < NumParts; ++i) {
1819 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1821 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1822 for (
unsigned j = 1; j <
MI.getNumOperands(); j += 2)
1823 MIB.
addUse(SrcRegs[j / 2][i]).
add(
MI.getOperand(j + 1));
1826 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), DstRegs);
1828 MI.eraseFromParent();
1831 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1832 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1836 int OpIdx =
MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1842 case TargetOpcode::G_ICMP: {
1844 LLT SrcTy = MRI.getType(LHS);
1850 if (!
extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1856 if (!
extractParts(
MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1857 RHSPartRegs, RHSLeftoverRegs,
MIRBuilder, MRI))
1863 LLT ResTy = MRI.getType(Dst);
1868 auto Zero =
MIRBuilder.buildConstant(NarrowTy, 0);
1870 for (
auto LHSAndRHS :
zip(LHSPartRegs, RHSPartRegs)) {
1871 auto LHS = std::get<0>(LHSAndRHS);
1872 auto RHS = std::get<1>(LHSAndRHS);
1873 auto Xor =
MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1880 for (
auto LHSAndRHS :
zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1881 auto LHS = std::get<0>(LHSAndRHS);
1882 auto RHS = std::get<1>(LHSAndRHS);
1883 auto Xor =
MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1884 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy,
Xor);
1885 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1886 TargetOpcode::G_ZEXT);
1893 assert(Xors.
size() >= 2 &&
"Should have gotten at least two Xors?");
1894 auto Or =
MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1895 for (
unsigned I = 2, E = Xors.
size();
I < E; ++
I)
1900 for (
unsigned I = 0, E = LHSPartRegs.
size();
I != E; ++
I) {
1904 if (
I == E - 1 && LHSLeftoverRegs.
empty()) {
1909 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1913 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[
I],
1916 auto Cmp =
MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[
I],
1919 LHSPartRegs[
I], RHSPartRegs[
I]);
1920 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1926 for (
unsigned I = 0, E = LHSLeftoverRegs.
size();
I != E; ++
I) {
1935 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1939 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[
I],
1940 RHSLeftoverRegs[
I]);
1942 auto Cmp =
MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[
I],
1943 RHSLeftoverRegs[
I]);
1946 LHSLeftoverRegs[
I], RHSLeftoverRegs[
I]);
1947 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1953 MI.eraseFromParent();
1956 case TargetOpcode::G_FCMP:
1965 case TargetOpcode::G_SEXT_INREG: {
1969 int64_t SizeInBits =
MI.getOperand(2).getImm();
1978 auto TruncMIB =
MIRBuilder.buildTrunc(NarrowTy, MO1);
1979 MO1.
setReg(TruncMIB.getReg(0));
1982 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1994 if (SizeOp0 % NarrowSize != 0)
1996 int NumParts = SizeOp0 / NarrowSize;
2004 for (
int i = 0; i < NumParts; ++i) {
2005 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
2020 for (
int i = 0; i < NumParts; ++i) {
2023 PartialExtensionReg = DstRegs.
back();
2025 assert(PartialExtensionReg &&
2026 "Expected to visit partial extension before full");
2027 if (FullExtensionReg) {
2032 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
2034 FullExtensionReg = DstRegs.
back();
2039 TargetOpcode::G_SEXT_INREG, {NarrowTy},
2042 PartialExtensionReg = DstRegs.
back();
2048 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
2049 MI.eraseFromParent();
2052 case TargetOpcode::G_BSWAP:
2053 case TargetOpcode::G_BITREVERSE: {
2054 if (SizeOp0 % NarrowSize != 0)
2059 unsigned NumParts = SizeOp0 / NarrowSize;
2060 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
2063 for (
unsigned i = 0; i < NumParts; ++i) {
2064 auto DstPart =
MIRBuilder.buildInstr(
MI.getOpcode(), {NarrowTy},
2065 {SrcRegs[NumParts - 1 - i]});
2069 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), DstRegs);
2072 MI.eraseFromParent();
2075 case TargetOpcode::G_PTR_ADD:
2076 case TargetOpcode::G_PTRMASK: {
2084 case TargetOpcode::G_FPTOUI:
2085 case TargetOpcode::G_FPTOSI:
2086 case TargetOpcode::G_FPTOUI_SAT:
2087 case TargetOpcode::G_FPTOSI_SAT:
2089 case TargetOpcode::G_FPEXT:
2096 case TargetOpcode::G_FLDEXP:
2097 case TargetOpcode::G_STRICT_FLDEXP:
2099 case TargetOpcode::G_VSCALE: {
2101 LLT Ty = MRI.getType(Dst);
2105 auto VScaleBase =
MIRBuilder.buildVScale(NarrowTy, One);
2106 auto ZExt =
MIRBuilder.buildZExt(Ty, VScaleBase);
2107 auto C =
MIRBuilder.buildConstant(Ty, *
MI.getOperand(1).getCImm());
2110 MI.eraseFromParent();
2117 LLT Ty = MRI.getType(Val);
2123 if (Ty.isPointer()) {
2124 if (
DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2126 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2132 if (Ty.isPointerVector())
2133 NewVal =
MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2134 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2138 unsigned OpIdx,
unsigned ExtOpcode) {
2140 auto ExtB =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2141 MO.
setReg(ExtB.getReg(0));
2147 auto ExtB =
MIRBuilder.buildTrunc(NarrowTy, MO);
2148 MO.
setReg(ExtB.getReg(0));
2152 unsigned OpIdx,
unsigned TruncOpcode) {
2154 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2156 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2161 unsigned OpIdx,
unsigned ExtOpcode) {
2163 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2165 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2174 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2176 MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
2182 MO.
setReg(
MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
2192 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2199LegalizerHelper::widenScalarMergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
2204 auto [DstReg, DstTy, Src1Reg, Src1Ty] =
MI.getFirst2RegLLTs();
2205 if (DstTy.isVector())
2210 const int SrcSize = SrcTy.getSizeInBits();
2212 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2214 unsigned NumOps =
MI.getNumOperands();
2215 unsigned NumSrc =
MI.getNumOperands() - 1;
2216 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2218 if (WideSize >= DstSize) {
2222 for (
unsigned I = 2;
I !=
NumOps; ++
I) {
2223 const unsigned Offset = (
I - 1) * PartSize;
2236 ResultReg = NextResult;
2239 if (WideSize > DstSize)
2241 else if (DstTy.isPointer())
2244 MI.eraseFromParent();
2269 const int GCD = std::gcd(SrcSize, WideSize);
2279 if (GCD == SrcSize) {
2282 auto Unmerge =
MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2283 for (
int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2289 if (
static_cast<int>(Unmerges.
size()) != NumMerge * WideSize) {
2291 for (
int I = Unmerges.
size();
I != NumMerge * WideSize; ++
I)
2295 const int PartsPerGCD = WideSize / GCD;
2299 for (
int I = 0;
I != NumMerge; ++
I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2301 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2308 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2310 auto FinalMerge =
MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2311 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2314 MI.eraseFromParent();
2319LegalizerHelper::widenScalarUnmergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
2324 int NumDst =
MI.getNumOperands() - 1;
2325 Register SrcReg =
MI.getOperand(NumDst).getReg();
2326 LLT SrcTy = MRI.getType(SrcReg);
2330 Register Dst0Reg =
MI.getOperand(0).getReg();
2331 LLT DstTy = MRI.getType(Dst0Reg);
2340 dbgs() <<
"Not casting non-integral address space integer\n");
2345 SrcReg =
MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2353 SrcReg =
MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2361 for (
int I = 1;
I != NumDst; ++
I) {
2362 auto ShiftAmt =
MIRBuilder.buildConstant(SrcTy, DstSize *
I);
2363 auto Shr =
MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2367 MI.eraseFromParent();
2378 LLVM_DEBUG(
dbgs() <<
"Widening pointer source types not implemented\n");
2382 WideSrc =
MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2385 auto Unmerge =
MIRBuilder.buildUnmerge(WideTy, WideSrc);
2403 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2408 if (PartsPerRemerge == 1) {
2411 for (
int I = 0;
I != NumUnmerge; ++
I) {
2412 auto MIB =
MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2414 for (
int J = 0; J != PartsPerUnmerge; ++J) {
2415 int Idx =
I * PartsPerUnmerge + J;
2417 MIB.addDef(
MI.getOperand(Idx).getReg());
2420 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2424 MIB.addUse(Unmerge.getReg(
I));
2427 SmallVector<Register, 16> Parts;
2428 for (
int J = 0; J != NumUnmerge; ++J)
2429 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2432 for (
int I = 0;
I != NumDst; ++
I) {
2433 for (
int J = 0; J < PartsPerRemerge; ++J) {
2434 const int Idx =
I * PartsPerRemerge + J;
2438 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(
I).getReg(), RemergeParts);
2439 RemergeParts.
clear();
2443 MI.eraseFromParent();
2448LegalizerHelper::widenScalarExtract(
MachineInstr &
MI,
unsigned TypeIdx,
2450 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
2451 unsigned Offset =
MI.getOperand(2).getImm();
2454 if (SrcTy.
isVector() || DstTy.isVector())
2466 Src =
MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2470 if (DstTy.isPointer())
2477 MI.eraseFromParent();
2482 LLT ShiftTy = SrcTy;
2491 MI.eraseFromParent();
2522LegalizerHelper::widenScalarInsert(
MachineInstr &
MI,
unsigned TypeIdx,
2524 if (TypeIdx != 0 || WideTy.
isVector())
2534LegalizerHelper::widenScalarAddSubOverflow(
MachineInstr &
MI,
unsigned TypeIdx,
2538 std::optional<Register> CarryIn;
2539 switch (
MI.getOpcode()) {
2542 case TargetOpcode::G_SADDO:
2543 Opcode = TargetOpcode::G_ADD;
2544 ExtOpcode = TargetOpcode::G_SEXT;
2546 case TargetOpcode::G_SSUBO:
2547 Opcode = TargetOpcode::G_SUB;
2548 ExtOpcode = TargetOpcode::G_SEXT;
2550 case TargetOpcode::G_UADDO:
2551 Opcode = TargetOpcode::G_ADD;
2552 ExtOpcode = TargetOpcode::G_ZEXT;
2554 case TargetOpcode::G_USUBO:
2555 Opcode = TargetOpcode::G_SUB;
2556 ExtOpcode = TargetOpcode::G_ZEXT;
2558 case TargetOpcode::G_SADDE:
2559 Opcode = TargetOpcode::G_UADDE;
2560 ExtOpcode = TargetOpcode::G_SEXT;
2561 CarryIn =
MI.getOperand(4).getReg();
2563 case TargetOpcode::G_SSUBE:
2564 Opcode = TargetOpcode::G_USUBE;
2565 ExtOpcode = TargetOpcode::G_SEXT;
2566 CarryIn =
MI.getOperand(4).getReg();
2568 case TargetOpcode::G_UADDE:
2569 Opcode = TargetOpcode::G_UADDE;
2570 ExtOpcode = TargetOpcode::G_ZEXT;
2571 CarryIn =
MI.getOperand(4).getReg();
2573 case TargetOpcode::G_USUBE:
2574 Opcode = TargetOpcode::G_USUBE;
2575 ExtOpcode = TargetOpcode::G_ZEXT;
2576 CarryIn =
MI.getOperand(4).getReg();
2592 auto LHSExt =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {
MI.getOperand(2)});
2593 auto RHSExt =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {
MI.getOperand(3)});
2597 LLT CarryOutTy = MRI.getType(
MI.getOperand(1).getReg());
2599 .buildInstr(Opcode, {WideTy, CarryOutTy},
2600 {LHSExt, RHSExt, *CarryIn})
2603 NewOp =
MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).
getReg(0);
2605 LLT OrigTy = MRI.getType(
MI.getOperand(0).getReg());
2606 auto TruncOp =
MIRBuilder.buildTrunc(OrigTy, NewOp);
2607 auto ExtOp =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2612 MI.eraseFromParent();
2617LegalizerHelper::widenScalarAddSubShlSat(
MachineInstr &
MI,
unsigned TypeIdx,
2619 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2620 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2621 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2622 bool IsShift =
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2623 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2636 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2643 auto ShiftK =
MIRBuilder.buildConstant(WideTy, SHLAmount);
2647 auto WideInst =
MIRBuilder.buildInstr(
MI.getOpcode(), {WideTy},
2648 {ShiftL, ShiftR},
MI.getFlags());
2653 :
MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2656 MI.eraseFromParent();
2661LegalizerHelper::widenScalarMulo(
MachineInstr &
MI,
unsigned TypeIdx,
2670 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULO;
2672 LLT SrcTy = MRI.getType(
LHS);
2673 LLT OverflowTy = MRI.getType(OriginalOverflow);
2680 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2681 auto LeftOperand =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
LHS});
2682 auto RightOperand =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
RHS});
2689 WideMulCanOverflow ?
MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2691 MachineInstrBuilder Mulo;
2692 if (WideMulCanOverflow)
2693 Mulo =
MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2694 {LeftOperand, RightOperand});
2696 Mulo =
MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2701 MachineInstrBuilder ExtResult;
2708 ExtResult =
MIRBuilder.buildSExtInReg(WideTy,
Mul, SrcBitWidth);
2712 ExtResult =
MIRBuilder.buildZExtInReg(WideTy,
Mul, SrcBitWidth);
2715 if (WideMulCanOverflow) {
2723 MI.eraseFromParent();
2729 unsigned Opcode =
MI.getOpcode();
2733 case TargetOpcode::G_ATOMICRMW_XCHG:
2734 case TargetOpcode::G_ATOMICRMW_ADD:
2735 case TargetOpcode::G_ATOMICRMW_SUB:
2736 case TargetOpcode::G_ATOMICRMW_AND:
2737 case TargetOpcode::G_ATOMICRMW_OR:
2738 case TargetOpcode::G_ATOMICRMW_XOR:
2739 case TargetOpcode::G_ATOMICRMW_MIN:
2740 case TargetOpcode::G_ATOMICRMW_MAX:
2741 case TargetOpcode::G_ATOMICRMW_UMIN:
2742 case TargetOpcode::G_ATOMICRMW_UMAX:
2743 assert(TypeIdx == 0 &&
"atomicrmw with second scalar type");
2749 case TargetOpcode::G_ATOMIC_CMPXCHG:
2750 assert(TypeIdx == 0 &&
"G_ATOMIC_CMPXCHG with second scalar type");
2757 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2767 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2772 case TargetOpcode::G_EXTRACT:
2773 return widenScalarExtract(
MI, TypeIdx, WideTy);
2774 case TargetOpcode::G_INSERT:
2775 return widenScalarInsert(
MI, TypeIdx, WideTy);
2776 case TargetOpcode::G_MERGE_VALUES:
2777 return widenScalarMergeValues(
MI, TypeIdx, WideTy);
2778 case TargetOpcode::G_UNMERGE_VALUES:
2779 return widenScalarUnmergeValues(
MI, TypeIdx, WideTy);
2780 case TargetOpcode::G_SADDO:
2781 case TargetOpcode::G_SSUBO:
2782 case TargetOpcode::G_UADDO:
2783 case TargetOpcode::G_USUBO:
2784 case TargetOpcode::G_SADDE:
2785 case TargetOpcode::G_SSUBE:
2786 case TargetOpcode::G_UADDE:
2787 case TargetOpcode::G_USUBE:
2788 return widenScalarAddSubOverflow(
MI, TypeIdx, WideTy);
2789 case TargetOpcode::G_UMULO:
2790 case TargetOpcode::G_SMULO:
2791 return widenScalarMulo(
MI, TypeIdx, WideTy);
2792 case TargetOpcode::G_SADDSAT:
2793 case TargetOpcode::G_SSUBSAT:
2794 case TargetOpcode::G_SSHLSAT:
2795 case TargetOpcode::G_UADDSAT:
2796 case TargetOpcode::G_USUBSAT:
2797 case TargetOpcode::G_USHLSAT:
2798 return widenScalarAddSubShlSat(
MI, TypeIdx, WideTy);
2799 case TargetOpcode::G_CTTZ:
2800 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2801 case TargetOpcode::G_CTLZ:
2802 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2803 case TargetOpcode::G_CTLS:
2804 case TargetOpcode::G_CTPOP: {
2817 case TargetOpcode::G_CTTZ:
2818 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2819 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2820 ExtOpc = TargetOpcode::G_ANYEXT;
2822 case TargetOpcode::G_CTLS:
2823 ExtOpc = TargetOpcode::G_SEXT;
2826 ExtOpc = TargetOpcode::G_ZEXT;
2829 auto MIBSrc =
MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2830 LLT CurTy = MRI.getType(SrcReg);
2831 unsigned NewOpc = Opcode;
2832 if (NewOpc == TargetOpcode::G_CTTZ) {
2839 WideTy, MIBSrc,
MIRBuilder.buildConstant(WideTy, TopBit));
2841 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2846 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2856 auto MIBNewOp =
MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2858 if (Opcode == TargetOpcode::G_CTLZ || Opcode == TargetOpcode::G_CTLS) {
2863 WideTy, MIBNewOp,
MIRBuilder.buildConstant(WideTy, SizeDiff),
2864 Opcode == TargetOpcode::G_CTLZ
2869 MIRBuilder.buildZExtOrTrunc(
MI.getOperand(0), MIBNewOp);
2870 MI.eraseFromParent();
2873 case TargetOpcode::G_BSWAP: {
2877 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2878 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2879 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2882 MI.getOperand(0).setReg(DstExt);
2886 LLT Ty = MRI.getType(DstReg);
2888 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2889 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2895 case TargetOpcode::G_BITREVERSE: {
2899 LLT Ty = MRI.getType(DstReg);
2902 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2904 MI.getOperand(0).setReg(DstExt);
2907 auto ShiftAmt =
MIRBuilder.buildConstant(WideTy, DiffBits);
2908 auto Shift =
MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2913 case TargetOpcode::G_FREEZE:
2914 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2921 case TargetOpcode::G_ABS:
2928 case TargetOpcode::G_ADD:
2929 case TargetOpcode::G_AND:
2930 case TargetOpcode::G_MUL:
2931 case TargetOpcode::G_OR:
2932 case TargetOpcode::G_XOR:
2933 case TargetOpcode::G_SUB:
2934 case TargetOpcode::G_SHUFFLE_VECTOR:
2945 case TargetOpcode::G_SBFX:
2946 case TargetOpcode::G_UBFX:
2960 case TargetOpcode::G_SHL:
2976 case TargetOpcode::G_ROTR:
2977 case TargetOpcode::G_ROTL:
2986 case TargetOpcode::G_SDIV:
2987 case TargetOpcode::G_SREM:
2988 case TargetOpcode::G_SMIN:
2989 case TargetOpcode::G_SMAX:
2990 case TargetOpcode::G_ABDS:
2998 case TargetOpcode::G_SDIVREM:
3008 case TargetOpcode::G_ASHR:
3009 case TargetOpcode::G_LSHR:
3013 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
3014 : TargetOpcode::G_ZEXT;
3027 case TargetOpcode::G_UDIV:
3028 case TargetOpcode::G_UREM:
3029 case TargetOpcode::G_ABDU:
3036 case TargetOpcode::G_UDIVREM:
3045 case TargetOpcode::G_UMIN:
3046 case TargetOpcode::G_UMAX: {
3047 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
3049 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
3053 ? TargetOpcode::G_SEXT
3054 : TargetOpcode::G_ZEXT;
3064 case TargetOpcode::G_SELECT:
3074 bool IsVec = MRI.getType(
MI.getOperand(1).getReg()).isVector();
3081 case TargetOpcode::G_FPEXT:
3089 case TargetOpcode::G_FPTOSI:
3090 case TargetOpcode::G_FPTOUI:
3091 case TargetOpcode::G_INTRINSIC_LRINT:
3092 case TargetOpcode::G_INTRINSIC_LLRINT:
3093 case TargetOpcode::G_IS_FPCLASS:
3103 case TargetOpcode::G_SITOFP:
3113 case TargetOpcode::G_UITOFP:
3123 case TargetOpcode::G_FPTOSI_SAT:
3124 case TargetOpcode::G_FPTOUI_SAT:
3129 LLT Ty = MRI.getType(OldDst);
3130 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
3132 MI.getOperand(0).setReg(ExtReg);
3133 uint64_t ShortBits = Ty.getScalarSizeInBits();
3136 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3147 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
3148 NewDst =
MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
3156 NewDst =
MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3164 case TargetOpcode::G_LOAD:
3165 case TargetOpcode::G_SEXTLOAD:
3166 case TargetOpcode::G_ZEXTLOAD:
3172 case TargetOpcode::G_STORE: {
3176 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
3177 assert(!Ty.isPointerOrPointerVector() &&
"Can't widen type");
3178 if (!Ty.isScalar()) {
3186 MI.setMemRefs(MF, {NewMMO});
3193 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3194 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3200 case TargetOpcode::G_CONSTANT: {
3203 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3204 MRI.getType(
MI.getOperand(0).getReg()));
3205 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3206 ExtOpc == TargetOpcode::G_ANYEXT) &&
3209 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3213 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
3219 case TargetOpcode::G_FCONSTANT: {
3225 auto IntCst =
MIRBuilder.buildConstant(
MI.getOperand(0).getReg(), Val);
3227 MI.eraseFromParent();
3230 case TargetOpcode::G_IMPLICIT_DEF: {
3236 case TargetOpcode::G_BRCOND:
3242 case TargetOpcode::G_FCMP:
3253 case TargetOpcode::G_ICMP:
3258 LLT SrcTy = MRI.getType(
MI.getOperand(2).getReg());
3262 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
3263 unsigned ExtOpcode =
3267 ? TargetOpcode::G_SEXT
3268 : TargetOpcode::G_ZEXT;
3275 case TargetOpcode::G_PTR_ADD:
3276 assert(TypeIdx == 1 &&
"unable to legalize pointer of G_PTR_ADD");
3282 case TargetOpcode::G_PHI: {
3283 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
3286 for (
unsigned I = 1;
I <
MI.getNumOperands();
I += 2) {
3298 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3301 LLT VecTy = MRI.getType(VecReg);
3307 TargetOpcode::G_ANYEXT);
3321 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3337 LLT VecTy = MRI.getType(VecReg);
3356 case TargetOpcode::G_FADD:
3357 case TargetOpcode::G_FMUL:
3358 case TargetOpcode::G_FSUB:
3359 case TargetOpcode::G_FMA:
3360 case TargetOpcode::G_FMAD:
3361 case TargetOpcode::G_FNEG:
3362 case TargetOpcode::G_FABS:
3363 case TargetOpcode::G_FCANONICALIZE:
3364 case TargetOpcode::G_FMINNUM:
3365 case TargetOpcode::G_FMAXNUM:
3366 case TargetOpcode::G_FMINNUM_IEEE:
3367 case TargetOpcode::G_FMAXNUM_IEEE:
3368 case TargetOpcode::G_FMINIMUM:
3369 case TargetOpcode::G_FMAXIMUM:
3370 case TargetOpcode::G_FMINIMUMNUM:
3371 case TargetOpcode::G_FMAXIMUMNUM:
3372 case TargetOpcode::G_FDIV:
3373 case TargetOpcode::G_FREM:
3374 case TargetOpcode::G_FCEIL:
3375 case TargetOpcode::G_FFLOOR:
3376 case TargetOpcode::G_FCOS:
3377 case TargetOpcode::G_FSIN:
3378 case TargetOpcode::G_FTAN:
3379 case TargetOpcode::G_FACOS:
3380 case TargetOpcode::G_FASIN:
3381 case TargetOpcode::G_FATAN:
3382 case TargetOpcode::G_FATAN2:
3383 case TargetOpcode::G_FCOSH:
3384 case TargetOpcode::G_FSINH:
3385 case TargetOpcode::G_FTANH:
3386 case TargetOpcode::G_FLOG10:
3387 case TargetOpcode::G_FLOG:
3388 case TargetOpcode::G_FLOG2:
3389 case TargetOpcode::G_FRINT:
3390 case TargetOpcode::G_FNEARBYINT:
3391 case TargetOpcode::G_FSQRT:
3392 case TargetOpcode::G_FEXP:
3393 case TargetOpcode::G_FEXP2:
3394 case TargetOpcode::G_FEXP10:
3395 case TargetOpcode::G_FPOW:
3396 case TargetOpcode::G_INTRINSIC_TRUNC:
3397 case TargetOpcode::G_INTRINSIC_ROUND:
3398 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3402 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E; ++
I)
3408 case TargetOpcode::G_FMODF: {
3418 case TargetOpcode::G_FPOWI:
3419 case TargetOpcode::G_FLDEXP:
3420 case TargetOpcode::G_STRICT_FLDEXP: {
3422 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3443 case TargetOpcode::G_FFREXP: {
3456 case TargetOpcode::G_LROUND:
3457 case TargetOpcode::G_LLROUND:
3468 case TargetOpcode::G_INTTOPTR:
3476 case TargetOpcode::G_PTRTOINT:
3484 case TargetOpcode::G_BUILD_VECTOR: {
3488 for (
int I = 1, E =
MI.getNumOperands();
I != E; ++
I)
3494 MI.setDesc(
MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3502 case TargetOpcode::G_SEXT_INREG:
3511 case TargetOpcode::G_PTRMASK: {
3519 case TargetOpcode::G_VECREDUCE_ADD: {
3528 case TargetOpcode::G_VECREDUCE_FADD:
3529 case TargetOpcode::G_VECREDUCE_FMUL:
3530 case TargetOpcode::G_VECREDUCE_FMIN:
3531 case TargetOpcode::G_VECREDUCE_FMAX:
3532 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3533 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3538 LLT VecTy = MRI.getType(VecReg);
3545 case TargetOpcode::G_VSCALE: {
3552 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
3557 case TargetOpcode::G_SPLAT_VECTOR: {
3566 case TargetOpcode::G_INSERT_SUBVECTOR: {
3574 LLT SubVecTy = MRI.getType(SubVec);
3578 auto BigZExt =
MIRBuilder.buildZExt(WideTy, BigVec);
3579 auto SubZExt =
MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3580 auto WideInsert =
MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3584 auto SplatZero =
MIRBuilder.buildSplatVector(
3589 MI.eraseFromParent();
3598 auto Unmerge =
B.buildUnmerge(Ty, Src);
3599 for (
int I = 0,
E = Unmerge->getNumOperands() - 1;
I !=
E; ++
I)
3608 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
3622 MIRBuilder.
buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3631 MI.eraseFromParent();
3642 MI.eraseFromParent();
3649 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
3650 if (SrcTy.isVector()) {
3654 if (DstTy.isVector()) {
3655 int NumDstElt = DstTy.getNumElements();
3656 int NumSrcElt = SrcTy.getNumElements();
3659 LLT DstCastTy = DstEltTy;
3660 LLT SrcPartTy = SrcEltTy;
3664 if (NumSrcElt < NumDstElt) {
3675 SrcPartTy = SrcEltTy;
3676 }
else if (NumSrcElt > NumDstElt) {
3688 DstCastTy = DstEltTy;
3693 SrcReg =
MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3697 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3698 MI.eraseFromParent();
3702 if (DstTy.isVector()) {
3705 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3706 MI.eraseFromParent();
3722 unsigned NewEltSize,
3723 unsigned OldEltSize) {
3724 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3725 LLT IdxTy =
B.getMRI()->getType(Idx);
3728 auto OffsetMask =
B.buildConstant(
3730 auto OffsetIdx =
B.buildAnd(IdxTy, Idx, OffsetMask);
3731 return B.buildShl(IdxTy, OffsetIdx,
3732 B.buildConstant(IdxTy,
Log2_32(OldEltSize))).getReg(0);
3747 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] =
MI.getFirst3RegLLTs();
3751 unsigned OldNumElts = SrcVecTy.getNumElements();
3758 if (NewNumElts > OldNumElts) {
3769 if (NewNumElts % OldNumElts != 0)
3773 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3777 auto NewEltsPerOldEltK =
MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3780 auto NewBaseIdx =
MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3782 for (
unsigned I = 0;
I < NewEltsPerOldElt; ++
I) {
3783 auto IdxOffset =
MIRBuilder.buildConstant(IdxTy,
I);
3784 auto TmpIdx =
MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3785 auto Elt =
MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3786 NewOps[
I] = Elt.getReg(0);
3789 auto NewVec =
MIRBuilder.buildBuildVector(MidTy, NewOps);
3791 MI.eraseFromParent();
3795 if (NewNumElts < OldNumElts) {
3796 if (NewEltSize % OldEltSize != 0)
3818 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3819 auto Log2Ratio =
MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3822 auto ScaledIdx =
MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3826 WideElt =
MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3827 ScaledIdx).getReg(0);
3835 auto ExtractedBits =
MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3837 MI.eraseFromParent();
3851 LLT TargetTy =
B.getMRI()->getType(TargetReg);
3852 LLT InsertTy =
B.getMRI()->getType(InsertReg);
3853 auto ZextVal =
B.buildZExt(TargetTy, InsertReg);
3854 auto ShiftedInsertVal =
B.buildShl(TargetTy, ZextVal, OffsetBits);
3857 auto EltMask =
B.buildConstant(
3861 auto ShiftedMask =
B.buildShl(TargetTy, EltMask, OffsetBits);
3862 auto InvShiftedMask =
B.buildNot(TargetTy, ShiftedMask);
3865 auto MaskedOldElt =
B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3869 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3883 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3884 MI.getFirst4RegLLTs();
3896 if (NewNumElts < OldNumElts) {
3897 if (NewEltSize % OldEltSize != 0)
3906 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3907 auto Log2Ratio =
MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3910 auto ScaledIdx =
MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3914 ExtractedElt =
MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3915 ScaledIdx).getReg(0);
3925 InsertedElt =
MIRBuilder.buildInsertVectorElement(
3926 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3930 MI.eraseFromParent();
3960 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
3964 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3965 return UnableToLegalize;
3970 for (
unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3972 MIRBuilder.
buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3981 MI.eraseFromParent();
3999 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
4000 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
4010 auto Inp1 =
MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
4011 auto Inp2 =
MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
4013 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
4014 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
4016 MI.eraseFromParent();
4046 LLT DstTy = MRI.getType(Dst);
4047 LLT SrcTy = MRI.getType(Src);
4053 if (DstTy == CastTy)
4061 if (CastEltSize < DstEltSize)
4064 auto AdjustAmt = CastEltSize / DstEltSize;
4065 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4066 SrcTyMinElts % AdjustAmt != 0)
4071 auto CastVec =
MIRBuilder.buildBitcast(SrcTy, Src);
4072 auto PromotedES =
MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
4075 ES->eraseFromParent();
4110 LLT DstTy = MRI.getType(Dst);
4111 LLT BigVecTy = MRI.getType(BigVec);
4112 LLT SubVecTy = MRI.getType(SubVec);
4114 if (DstTy == CastTy)
4129 if (CastEltSize < DstEltSize)
4132 auto AdjustAmt = CastEltSize / DstEltSize;
4133 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4134 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
4140 auto CastBigVec =
MIRBuilder.buildBitcast(BigVecTy, BigVec);
4141 auto CastSubVec =
MIRBuilder.buildBitcast(SubVecTy, SubVec);
4143 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
4146 ES->eraseFromParent();
4154 LLT DstTy = MRI.getType(DstReg);
4162 if (MemSizeInBits != MemStoreSizeInBits) {
4179 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
4183 auto NewLoad =
MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4184 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
4186 auto NewLoad =
MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4189 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4191 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4194 if (DstTy != LoadTy)
4202 if (
MIRBuilder.getDataLayout().isBigEndian())
4220 uint64_t LargeSplitSize, SmallSplitSize;
4225 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4232 if (TLI.allowsMemoryAccess(Ctx,
MIRBuilder.getDataLayout(), MemTy, MMO))
4235 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4246 if (Alignment.
value() * 8 > MemSizeInBits &&
4251 auto NewLoad =
MIRBuilder.buildLoad(MoreTy, PtrReg, *NewMMO);
4268 LLT PtrTy = MRI.getType(PtrReg);
4271 auto LargeLoad =
MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4275 LargeSplitSize / 8);
4276 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4277 auto SmallPtr =
MIRBuilder.buildObjectPtrOffset(PtrAddReg, PtrReg, OffsetCst);
4279 SmallPtr, *SmallMMO);
4281 auto ShiftAmt =
MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4282 auto Shift =
MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4284 if (AnyExtTy == DstTy)
4285 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4287 auto Or =
MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4291 auto Or =
MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4311 LLT SrcTy = MRI.getType(SrcReg);
4319 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4325 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4327 SrcReg =
MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4331 auto ZextInReg =
MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4335 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4350 uint64_t LargeSplitSize, SmallSplitSize;
4357 if (TLI.allowsMemoryAccess(Ctx,
MIRBuilder.getDataLayout(), MemTy, MMO))
4360 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4369 if (SrcTy.isPointer()) {
4371 SrcReg =
MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4374 auto ExtVal =
MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4377 auto ShiftAmt =
MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4378 auto SmallVal =
MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4381 LLT PtrTy = MRI.getType(PtrReg);
4384 auto SmallPtr =
MIRBuilder.buildObjectPtrOffset(PtrTy, PtrReg, OffsetCst);
4390 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4391 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4400 LLT SrcTy = MRI.getType(SrcReg);
4406 assert(SrcTy.isVector() &&
"Expect a vector store type");
4413 auto CurrVal =
MIRBuilder.buildConstant(IntTy, 0);
4417 auto Elt =
MIRBuilder.buildExtractVectorElement(
4418 SrcTy.getElementType(), SrcReg,
MIRBuilder.buildConstant(IdxTy,
I));
4419 auto Trunc =
MIRBuilder.buildTrunc(MemScalarTy, Elt);
4420 auto ZExt =
MIRBuilder.buildZExt(IntTy, Trunc);
4426 auto Shifted =
MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4427 CurrVal =
MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4431 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4442 switch (
MI.getOpcode()) {
4443 case TargetOpcode::G_LOAD: {
4461 case TargetOpcode::G_STORE: {
4477 case TargetOpcode::G_SELECT: {
4481 if (MRI.getType(
MI.getOperand(1).getReg()).isVector()) {
4483 dbgs() <<
"bitcast action not implemented for vector select\n");
4494 case TargetOpcode::G_AND:
4495 case TargetOpcode::G_OR:
4496 case TargetOpcode::G_XOR: {
4504 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4506 case TargetOpcode::G_INSERT_VECTOR_ELT:
4508 case TargetOpcode::G_CONCAT_VECTORS:
4510 case TargetOpcode::G_SHUFFLE_VECTOR:
4512 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4514 case TargetOpcode::G_INSERT_SUBVECTOR:
4522void LegalizerHelper::changeOpcode(
MachineInstr &
MI,
unsigned NewOpcode) {
4532 switch(
MI.getOpcode()) {
4535 case TargetOpcode::G_FCONSTANT:
4537 case TargetOpcode::G_BITCAST:
4539 case TargetOpcode::G_SREM:
4540 case TargetOpcode::G_UREM: {
4541 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4543 MIRBuilder.buildInstr(
MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4544 {MI.getOperand(1), MI.getOperand(2)});
4546 auto Prod =
MIRBuilder.buildMul(Ty, Quot,
MI.getOperand(2));
4548 MI.eraseFromParent();
4551 case TargetOpcode::G_SADDO:
4552 case TargetOpcode::G_SSUBO:
4554 case TargetOpcode::G_SADDE:
4556 case TargetOpcode::G_SSUBE:
4558 case TargetOpcode::G_UMULH:
4559 case TargetOpcode::G_SMULH:
4561 case TargetOpcode::G_SMULO:
4562 case TargetOpcode::G_UMULO: {
4565 auto [Res, Overflow, LHS, RHS] =
MI.getFirst4Regs();
4566 LLT Ty = MRI.getType(Res);
4568 unsigned Opcode =
MI.getOpcode() == TargetOpcode::G_SMULO
4569 ? TargetOpcode::G_SMULH
4570 : TargetOpcode::G_UMULH;
4574 MI.setDesc(
TII.get(TargetOpcode::G_MUL));
4575 MI.removeOperand(1);
4578 auto HiPart =
MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4586 if (Opcode == TargetOpcode::G_SMULH) {
4587 auto ShiftAmt =
MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4588 auto Shifted =
MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4595 case TargetOpcode::G_FNEG: {
4596 auto [Res, SubByReg] =
MI.getFirst2Regs();
4597 LLT Ty = MRI.getType(Res);
4601 MIRBuilder.buildXor(Res, SubByReg, SignMask);
4602 MI.eraseFromParent();
4605 case TargetOpcode::G_FSUB:
4606 case TargetOpcode::G_STRICT_FSUB: {
4607 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
4608 LLT Ty = MRI.getType(Res);
4613 if (
MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4614 MIRBuilder.buildStrictFAdd(Res, LHS, Neg,
MI.getFlags());
4618 MI.eraseFromParent();
4621 case TargetOpcode::G_FMAD:
4623 case TargetOpcode::G_FFLOOR:
4625 case TargetOpcode::G_LROUND:
4626 case TargetOpcode::G_LLROUND: {
4629 LLT SrcTy = MRI.getType(SrcReg);
4630 auto Round =
MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4633 MI.eraseFromParent();
4636 case TargetOpcode::G_INTRINSIC_ROUND:
4638 case TargetOpcode::G_FRINT: {
4641 changeOpcode(
MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4644 case TargetOpcode::G_INTRINSIC_LRINT:
4645 case TargetOpcode::G_INTRINSIC_LLRINT: {
4648 LLT SrcTy = MRI.getType(SrcReg);
4650 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4652 MI.eraseFromParent();
4655 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4656 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] =
MI.getFirst5Regs();
4657 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4658 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4659 **
MI.memoperands_begin());
4661 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4662 MI.eraseFromParent();
4665 case TargetOpcode::G_LOAD:
4666 case TargetOpcode::G_SEXTLOAD:
4667 case TargetOpcode::G_ZEXTLOAD:
4669 case TargetOpcode::G_STORE:
4671 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4672 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4673 case TargetOpcode::G_CTLZ:
4674 case TargetOpcode::G_CTTZ:
4675 case TargetOpcode::G_CTPOP:
4676 case TargetOpcode::G_CTLS:
4679 auto [Res, CarryOut, LHS, RHS] =
MI.getFirst4Regs();
4681 Register NewRes = MRI.cloneVirtualRegister(Res);
4688 MI.eraseFromParent();
4692 auto [Res, CarryOut, LHS, RHS, CarryIn] =
MI.getFirst5Regs();
4693 const LLT CondTy = MRI.getType(CarryOut);
4694 const LLT Ty = MRI.getType(Res);
4696 Register NewRes = MRI.cloneVirtualRegister(Res);
4699 auto TmpRes =
MIRBuilder.buildAdd(Ty, LHS, RHS);
4705 auto ZExtCarryIn =
MIRBuilder.buildZExt(Ty, CarryIn);
4706 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4713 auto Carry2 =
MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4718 MI.eraseFromParent();
4722 auto [Res, BorrowOut, LHS, RHS] =
MI.getFirst4Regs();
4727 MI.eraseFromParent();
4731 auto [Res, BorrowOut, LHS, RHS, BorrowIn] =
MI.getFirst5Regs();
4732 const LLT CondTy = MRI.getType(BorrowOut);
4733 const LLT Ty = MRI.getType(Res);
4736 auto TmpRes =
MIRBuilder.buildSub(Ty, LHS, RHS);
4742 auto ZExtBorrowIn =
MIRBuilder.buildZExt(Ty, BorrowIn);
4743 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4750 auto Borrow2 =
MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4751 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4753 MI.eraseFromParent();
4791 case G_MERGE_VALUES:
4793 case G_UNMERGE_VALUES:
4795 case TargetOpcode::G_SEXT_INREG: {
4796 assert(
MI.getOperand(2).isImm() &&
"Expected immediate");
4797 int64_t SizeInBits =
MI.getOperand(2).getImm();
4799 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
4800 LLT DstTy = MRI.getType(DstReg);
4801 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4804 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4805 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4806 MI.eraseFromParent();
4809 case G_EXTRACT_VECTOR_ELT:
4810 case G_INSERT_VECTOR_ELT:
4812 case G_SHUFFLE_VECTOR:
4814 case G_VECTOR_COMPRESS:
4816 case G_DYN_STACKALLOC:
4820 case G_STACKRESTORE:
4830 case G_READ_REGISTER:
4831 case G_WRITE_REGISTER:
4838 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4839 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4845 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4850 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4861 bool IsSigned =
MI.getOpcode() == G_ABDS;
4862 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4863 if ((IsSigned && LI.isLegal({G_SMIN, Ty}) && LI.isLegal({G_SMAX, Ty})) ||
4864 (!IsSigned && LI.isLegal({G_UMIN, Ty}) && LI.isLegal({G_UMAX, Ty}))) {
4888 case G_MEMCPY_INLINE:
4889 return lowerMemcpyInline(
MI);
4900 case G_ATOMICRMW_SUB: {
4901 auto [Ret, Mem, Val] =
MI.getFirst3Regs();
4902 const LLT ValTy = MRI.getType(Val);
4906 MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, Ret, Mem, VNeg, *MMO);
4907 MI.eraseFromParent();
4930 unsigned AddrSpace =
DL.getAllocaAddrSpace();
4934 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4940 Align StackTypeAlign =
4947 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
4948 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
4953 LLT IdxTy =
B.getMRI()->getType(IdxReg);
4965 return B.buildAnd(IdxTy, IdxReg,
B.buildConstant(IdxTy, Imm)).getReg(0);
4968 return B.buildUMin(IdxTy, IdxReg,
B.buildConstant(IdxTy, NElts - 1))
4979 "Converting bits to bytes lost precision");
4985 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4986 unsigned IndexSizeInBits =
DL.getIndexSize(AS) * 8;
4988 if (IdxTy != MRI.getType(Index))
4989 Index =
MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
4994 LLT PtrTy = MRI.getType(VecPtr);
4995 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr,
Mul).getReg(0);
5003 std::initializer_list<unsigned> NonVecOpIndices) {
5004 if (
MI.getNumMemOperands() != 0)
5021 if (!Ty.isVector()) {
5027 if (Ty.getNumElements() != NumElts)
5042 assert(Ty.isVector() &&
"Expected vector type");
5044 int NumParts, NumLeftover;
5045 std::tie(NumParts, NumLeftover) =
5048 assert(NumParts > 0 &&
"Error in getNarrowTypeBreakDown");
5049 for (
int i = 0; i < NumParts; ++i) {
5054 assert(NumLeftover == 1 &&
"expected exactly one leftover");
5063 for (
unsigned i = 0; i <
N; ++i) {
5065 Ops.push_back(
Op.getReg());
5066 else if (
Op.isImm())
5067 Ops.push_back(
Op.getImm());
5068 else if (
Op.isPredicate())
5090 std::initializer_list<unsigned> NonVecOpIndices) {
5092 "Non-compatible opcode or not specified non-vector operands");
5093 unsigned OrigNumElts = MRI.getType(
MI.getReg(0)).getNumElements();
5095 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
5096 unsigned NumDefs =
MI.getNumDefs();
5104 for (
unsigned i = 0; i < NumDefs; ++i) {
5105 makeDstOps(OutputOpsPieces[i], MRI.getType(
MI.getReg(i)), NumElts);
5113 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
5114 ++UseIdx, ++UseNo) {
5117 MI.getOperand(UseIdx));
5126 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5130 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5132 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5133 Defs.
push_back(OutputOpsPieces[DstNo][i]);
5136 for (
unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
5137 Uses.push_back(InputOpsPieces[InputNo][i]);
5140 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5141 OutputRegs[DstNo].push_back(
I.getReg(DstNo));
5146 for (
unsigned i = 0; i < NumDefs; ++i)
5147 mergeMixedSubvectors(
MI.getReg(i), OutputRegs[i]);
5149 for (
unsigned i = 0; i < NumDefs; ++i)
5150 MIRBuilder.buildMergeLikeInstr(
MI.getReg(i), OutputRegs[i]);
5153 MI.eraseFromParent();
5160 unsigned OrigNumElts = MRI.getType(
MI.getReg(0)).getNumElements();
5162 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
5163 unsigned NumDefs =
MI.getNumDefs();
5167 makeDstOps(OutputOpsPieces, MRI.getType(
MI.getReg(0)), NumElts);
5172 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
5173 UseIdx += 2, ++UseNo) {
5181 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5183 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5184 auto Phi =
MIRBuilder.buildInstr(TargetOpcode::G_PHI);
5186 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
5189 for (
unsigned j = 0; j < NumInputs / 2; ++j) {
5190 Phi.addUse(InputOpsPieces[j][i]);
5191 Phi.add(
MI.getOperand(1 + j * 2 + 1));
5201 mergeMixedSubvectors(
MI.getReg(0), OutputRegs);
5203 MIRBuilder.buildMergeLikeInstr(
MI.getReg(0), OutputRegs);
5206 MI.eraseFromParent();
5214 const int NumDst =
MI.getNumOperands() - 1;
5215 const Register SrcReg =
MI.getOperand(NumDst).getReg();
5216 LLT DstTy = MRI.getType(
MI.getOperand(0).getReg());
5217 LLT SrcTy = MRI.getType(SrcReg);
5219 if (TypeIdx != 1 || NarrowTy == DstTy)
5226 assert(SrcTy.isVector() && NarrowTy.
isVector() &&
"Expected vector types");
5229 if ((SrcTy.getSizeInBits() % NarrowTy.
getSizeInBits() != 0) ||
5243 auto Unmerge =
MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5244 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5245 const int PartsPerUnmerge = NumDst / NumUnmerge;
5247 for (
int I = 0;
I != NumUnmerge; ++
I) {
5248 auto MIB =
MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5250 for (
int J = 0; J != PartsPerUnmerge; ++J)
5251 MIB.addDef(
MI.getOperand(
I * PartsPerUnmerge + J).getReg());
5252 MIB.addUse(Unmerge.getReg(
I));
5255 MI.eraseFromParent();
5262 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
5266 assert(DstTy.isVector() && NarrowTy.
isVector() &&
"Expected vector types");
5268 if (NarrowTy == SrcTy)
5276 assert(SrcTy.isVector() &&
"Expected vector types");
5278 if ((DstTy.getSizeInBits() % NarrowTy.
getSizeInBits() != 0) ||
5292 for (
unsigned i = 1; i <
MI.getNumOperands(); ++i) {
5293 auto Unmerge =
MIRBuilder.buildUnmerge(EltTy,
MI.getOperand(i).getReg());
5294 for (
unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5300 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5301 for (
unsigned i = 0,
Offset = 0; i < NumNarrowTyPieces;
5302 ++i,
Offset += NumNarrowTyElts) {
5305 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5308 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5309 MI.eraseFromParent();
5313 assert(TypeIdx == 0 &&
"Bad type index");
5314 if ((NarrowTy.
getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5329 unsigned NumParts = DstTy.getNumElements() / NarrowTy.
getNumElements();
5330 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5332 for (
unsigned i = 0; i < NumParts; ++i) {
5334 for (
unsigned j = 0; j < NumElts; ++j)
5335 Sources.
push_back(
MI.getOperand(1 + i * NumElts + j).getReg());
5337 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5340 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5341 MI.eraseFromParent();
5349 auto [DstReg, SrcVec] =
MI.getFirst2Regs();
5351 bool IsInsert =
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5353 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) &&
"not a vector type index");
5355 InsertVal =
MI.getOperand(2).getReg();
5357 Register Idx =
MI.getOperand(
MI.getNumOperands() - 1).getReg();
5358 LLT VecTy = MRI.getType(SrcVec);
5364 uint64_t IdxVal = MaybeCst->Value.getZExtValue();
5368 MI.eraseFromParent();
5377 SplitPieces[IdxVal] = InsertVal;
5378 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0).getReg(), SplitPieces);
5380 MIRBuilder.buildCopy(
MI.getOperand(0).getReg(), SplitPieces[IdxVal]);
5384 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5387 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5388 TargetOpcode::G_ANYEXT);
5392 LLT IdxTy = MRI.getType(Idx);
5393 int64_t PartIdx = IdxVal / NewNumElts;
5395 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5398 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5401 auto InsertPart =
MIRBuilder.buildInsertVectorElement(
5402 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5403 VecParts[PartIdx] = InsertPart.getReg(0);
5407 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5409 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5413 MI.eraseFromParent();
5433 LLVM_DEBUG(
dbgs() <<
"Can't narrow load/store to non-byte-sized type\n");
5445 LLT ValTy = MRI.getType(ValReg);
5454 int NumLeftover = -1;
5460 if (
extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5462 NumParts = NarrowRegs.
size();
5463 NumLeftover = NarrowLeftoverRegs.
size();
5470 LLT PtrTy = MRI.getType(AddrReg);
5480 auto MMO = LdStMI.
getMMO();
5482 unsigned NumParts,
unsigned Offset) ->
unsigned {
5485 for (
unsigned Idx = 0, E = NumParts; Idx != E &&
Offset < TotalSize;
5487 unsigned ByteOffset =
Offset / 8;
5490 MIRBuilder.materializeObjectPtrOffset(NewAddrReg, AddrReg, OffsetTy,
5497 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5498 ValRegs.push_back(Dst);
5499 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5501 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5510 unsigned HandledOffset =
5511 splitTypePieces(NarrowTy, NarrowRegs, NumParts,
Offset);
5515 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5518 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5519 LeftoverTy, NarrowLeftoverRegs);
5533 switch (
MI.getOpcode()) {
5534 case G_IMPLICIT_DEF:
5550 case G_FCANONICALIZE:
5567 case G_INTRINSIC_LRINT:
5568 case G_INTRINSIC_LLRINT:
5569 case G_INTRINSIC_ROUND:
5570 case G_INTRINSIC_ROUNDEVEN:
5573 case G_INTRINSIC_TRUNC:
5601 case G_FMINNUM_IEEE:
5602 case G_FMAXNUM_IEEE:
5624 case G_CTLZ_ZERO_UNDEF:
5626 case G_CTTZ_ZERO_UNDEF:
5643 case G_ADDRSPACE_CAST:
5656 case G_STRICT_FLDEXP:
5658 case G_TRUNC_SSAT_S:
5659 case G_TRUNC_SSAT_U:
5660 case G_TRUNC_USAT_U:
5668 if (MRI.getType(
MI.getOperand(1).getReg()).isVector())
5673 case G_UNMERGE_VALUES:
5675 case G_BUILD_VECTOR:
5676 assert(TypeIdx == 0 &&
"not a vector type index");
5678 case G_CONCAT_VECTORS:
5682 case G_EXTRACT_VECTOR_ELT:
5683 case G_INSERT_VECTOR_ELT:
5692 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5693 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5695 case G_SHUFFLE_VECTOR:
5701 case G_INTRINSIC_FPTRUNC_ROUND:
5711 assert(
MI.getOpcode() == TargetOpcode::G_BITCAST &&
5712 "Not a bitcast operation");
5717 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
5719 unsigned NewElemCount =
5722 if (NewElemCount == 1) {
5725 auto Unmerge =
MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg);
5732 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5741 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5742 MI.eraseFromParent();
5748 assert(
MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5752 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5753 MI.getFirst3RegLLTs();
5756 if (DstTy != Src1Ty)
5758 if (DstTy != Src2Ty)
5773 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5789 unsigned InputUsed[2] = {-1U, -1U};
5790 unsigned FirstMaskIdx =
High * NewElts;
5791 bool UseBuildVector =
false;
5792 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5794 int Idx = Mask[FirstMaskIdx + MaskOffset];
5799 if (
Input >= std::size(Inputs)) {
5806 Idx -=
Input * NewElts;
5810 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5811 if (InputUsed[OpNo] ==
Input) {
5814 }
else if (InputUsed[OpNo] == -1U) {
5816 InputUsed[OpNo] =
Input;
5821 if (OpNo >= std::size(InputUsed)) {
5824 UseBuildVector =
true;
5829 Ops.push_back(Idx + OpNo * NewElts);
5832 if (UseBuildVector) {
5837 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5839 int Idx = Mask[FirstMaskIdx + MaskOffset];
5844 if (
Input >= std::size(Inputs)) {
5851 Idx -=
Input * NewElts;
5855 .buildExtractVectorElement(
5856 EltTy, Inputs[
Input],
5862 Output =
MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5863 }
else if (InputUsed[0] == -1U) {
5865 Output =
MIRBuilder.buildUndef(NarrowTy).getReg(0);
5866 }
else if (NewElts == 1) {
5867 Output =
MIRBuilder.buildCopy(NarrowTy, Inputs[InputUsed[0]]).getReg(0);
5869 Register Op0 = Inputs[InputUsed[0]];
5873 : Inputs[InputUsed[1]];
5875 Output =
MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1,
Ops).getReg(0);
5882 MI.eraseFromParent();
5895 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5901 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5904 const unsigned NumParts =
5906 : SrcTy.getNumElements();
5910 if (DstTy != NarrowTy)
5916 unsigned NumPartsLeft = NumParts;
5917 while (NumPartsLeft > 1) {
5918 for (
unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5921 .buildInstr(ScalarOpc, {NarrowTy},
5922 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5925 SplitSrcs = PartialResults;
5926 PartialResults.
clear();
5927 NumPartsLeft = SplitSrcs.
size();
5931 MI.eraseFromParent();
5936 for (
unsigned Idx = 1; Idx < NumParts; ++Idx)
5937 Acc =
MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5940 MI.eraseFromParent();
5944 for (
unsigned Part = 0; Part < NumParts; ++Part) {
5946 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5954 return tryNarrowPow2Reduction(
MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5957 Register Acc = PartialReductions[0];
5958 for (
unsigned Part = 1; Part < NumParts; ++Part) {
5959 if (Part == NumParts - 1) {
5961 {Acc, PartialReductions[Part]});
5964 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5968 MI.eraseFromParent();
5974 unsigned int TypeIdx,
5976 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5977 MI.getFirst3RegLLTs();
5978 if (!NarrowTy.
isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5982 assert((
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5983 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5984 "Unexpected vecreduce opcode");
5985 unsigned ScalarOpc =
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5986 ? TargetOpcode::G_FADD
5987 : TargetOpcode::G_FMUL;
5990 unsigned NumParts = SrcTy.getNumElements();
5993 for (
unsigned i = 0; i < NumParts; i++)
5994 Acc =
MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5998 MI.eraseFromParent();
6005 unsigned ScalarOpc) {
6013 while (SplitSrcs.
size() > 1) {
6015 for (
unsigned Idx = 0; Idx < SplitSrcs.
size()-1; Idx += 2) {
6023 SplitSrcs = std::move(PartialRdxs);
6027 MI.getOperand(1).setReg(SplitSrcs[0]);
6034 const LLT HalfTy,
const LLT AmtTy) {
6036 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6037 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6041 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), {InL, InH});
6042 MI.eraseFromParent();
6048 unsigned VTBits = 2 * NVTBits;
6051 if (
MI.getOpcode() == TargetOpcode::G_SHL) {
6052 if (Amt.
ugt(VTBits)) {
6054 }
else if (Amt.
ugt(NVTBits)) {
6057 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6058 }
else if (Amt == NVTBits) {
6066 NVT, InL,
MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6069 }
else if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
6070 if (Amt.
ugt(VTBits)) {
6072 }
else if (Amt.
ugt(NVTBits)) {
6074 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6076 }
else if (Amt == NVTBits) {
6080 auto ShiftAmtConst =
MIRBuilder.buildConstant(AmtTy, Amt);
6082 auto OrLHS =
MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6084 NVT, InH,
MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6090 if (Amt.
ugt(VTBits)) {
6092 NVT, InH,
MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6093 }
else if (Amt.
ugt(NVTBits)) {
6095 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6097 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6098 }
else if (Amt == NVTBits) {
6101 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6103 auto ShiftAmtConst =
MIRBuilder.buildConstant(AmtTy, Amt);
6105 auto OrLHS =
MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6107 NVT, InH,
MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6114 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), {Lo, Hi});
6115 MI.eraseFromParent();
6131 LLT DstTy = MRI.getType(DstReg);
6136 LLT ShiftAmtTy = MRI.getType(Amt);
6138 if (DstEltSize % 2 != 0)
6154 const unsigned NumParts = DstEltSize / RequestedTy.
getSizeInBits();
6165 const unsigned NewBitSize = DstEltSize / 2;
6177 auto NewBits =
MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
6179 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6180 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6183 auto AmtExcess =
MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
6184 auto AmtLack =
MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
6186 auto Zero =
MIRBuilder.buildConstant(ShiftAmtTy, 0);
6191 switch (
MI.getOpcode()) {
6192 case TargetOpcode::G_SHL: {
6194 auto LoS =
MIRBuilder.buildShl(HalfTy, InL, Amt);
6196 auto LoOr =
MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
6197 auto HiOr =
MIRBuilder.buildShl(HalfTy, InH, Amt);
6198 auto HiS =
MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6201 auto LoL =
MIRBuilder.buildConstant(HalfTy, 0);
6202 auto HiL =
MIRBuilder.buildShl(HalfTy, InL, AmtExcess);
6204 auto Lo =
MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
6206 HalfTy, IsZero, InH,
MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
6208 ResultRegs[0] =
Lo.getReg(0);
6209 ResultRegs[1] =
Hi.getReg(0);
6212 case TargetOpcode::G_LSHR:
6213 case TargetOpcode::G_ASHR: {
6215 auto HiS =
MIRBuilder.buildInstr(
MI.getOpcode(), {HalfTy}, {InH, Amt});
6217 auto LoOr =
MIRBuilder.buildLShr(HalfTy, InL, Amt);
6218 auto HiOr =
MIRBuilder.buildShl(HalfTy, InH, AmtLack);
6219 auto LoS =
MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6223 if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
6226 auto ShiftAmt =
MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
6227 HiL =
MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt);
6229 auto LoL =
MIRBuilder.buildInstr(
MI.getOpcode(), {HalfTy},
6233 HalfTy, IsZero, InL,
MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
6235 auto Hi =
MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
6237 ResultRegs[0] =
Lo.getReg(0);
6238 ResultRegs[1] =
Hi.getReg(0);
6245 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
6246 MI.eraseFromParent();
6255 LLT TargetTy,
LLT ShiftAmtTy) {
6258 assert(WordShiftConst && BitShiftConst &&
"Expected constants");
6260 const unsigned ShiftWords = WordShiftConst->getZExtValue();
6261 const unsigned ShiftBits = BitShiftConst->getZExtValue();
6262 const bool NeedsInterWordShift = ShiftBits != 0;
6265 case TargetOpcode::G_SHL: {
6268 if (PartIdx < ShiftWords)
6271 unsigned SrcIdx = PartIdx - ShiftWords;
6272 if (!NeedsInterWordShift)
6273 return SrcParts[SrcIdx];
6278 auto Lo =
MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx - 1],
6282 return Hi.getReg(0);
6285 case TargetOpcode::G_LSHR: {
6286 unsigned SrcIdx = PartIdx + ShiftWords;
6287 if (SrcIdx >= NumParts)
6289 if (!NeedsInterWordShift)
6290 return SrcParts[SrcIdx];
6294 if (SrcIdx + 1 < NumParts) {
6295 auto Hi =
MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx + 1],
6299 return Lo.getReg(0);
6302 case TargetOpcode::G_ASHR: {
6304 unsigned SrcIdx = PartIdx + ShiftWords;
6305 if (SrcIdx >= NumParts)
6307 if (!NeedsInterWordShift)
6308 return SrcParts[SrcIdx];
6313 (SrcIdx == NumParts - 1)
6317 (SrcIdx + 1 < NumParts) ? SrcParts[SrcIdx + 1] : Params.
SignBit;
6339 unsigned MainOpcode = (Opcode == TargetOpcode::G_ASHR)
6340 ?
static_cast<unsigned>(TargetOpcode::G_LSHR)
6345 MIRBuilder.buildInstr(MainOpcode, {TargetTy}, {MainOperand, ShiftAmt})
6354 LLT ShiftAmtTy = MRI.getType(ShiftAmt);
6355 auto ZeroConst =
MIRBuilder.buildConstant(ShiftAmtTy, 0);
6357 auto IsZeroBitShift =
6365 unsigned CarryOpcode = (Opcode == TargetOpcode::G_SHL) ? TargetOpcode::G_LSHR
6366 : TargetOpcode::G_SHL;
6369 auto TargetBitsConst =
6371 auto InvShiftAmt =
MIRBuilder.buildSub(ShiftAmtTy, TargetBitsConst, ShiftAmt);
6376 .buildInstr(CarryOpcode, {TargetTy}, {CarryOperand, InvShiftAmt})
6381 auto ZeroReg =
MIRBuilder.buildConstant(TargetTy, 0);
6383 MIRBuilder.buildSelect(TargetTy, IsZeroBitShift, ZeroReg, CarryBits)
6387 return MIRBuilder.buildOr(TargetTy, MainShifted, SafeCarryBits).getReg(0);
6400 LLT DstTy = MRI.getType(DstReg);
6404 const unsigned NumParts = DstBits / TargetBits;
6406 assert(DstBits % TargetBits == 0 &&
"Target type must evenly divide source");
6416 MIRBuilder.buildMergeLikeInstr(DstReg, SrcParts);
6417 MI.eraseFromParent();
6422 const unsigned ShiftWords = Amt.
getZExtValue() / TargetBits;
6423 const unsigned ShiftBits = Amt.
getZExtValue() % TargetBits;
6429 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - ShiftBits).getReg(0);
6433 if (
MI.getOpcode() == TargetOpcode::G_ASHR)
6436 .buildAShr(TargetTy, SrcParts[SrcParts.
size() - 1],
6437 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1))
6441 for (
unsigned I = 0;
I < NumParts; ++
I)
6443 Params, TargetTy, ShiftAmtTy);
6445 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6446 MI.eraseFromParent();
6455 LLT DstTy = MRI.getType(DstReg);
6456 LLT ShiftAmtTy = MRI.getType(AmtReg);
6460 const unsigned NumParts = DstBits / TargetBits;
6462 assert(DstBits % TargetBits == 0 &&
"Target type must evenly divide source");
6479 auto ZeroAmtConst =
MIRBuilder.buildConstant(ShiftAmtTy, 0);
6491 unsigned TargetBitsLog2 =
Log2_32(TargetBits);
6492 auto TargetBitsLog2Const =
6493 MIRBuilder.buildConstant(ShiftAmtTy, TargetBitsLog2);
6494 auto TargetBitsMask =
MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6497 MIRBuilder.buildLShr(ShiftAmtTy, AmtReg, TargetBitsLog2Const).getReg(0);
6499 MIRBuilder.buildAnd(ShiftAmtTy, AmtReg, TargetBitsMask).getReg(0);
6507 if (
MI.getOpcode() == TargetOpcode::G_ASHR) {
6508 auto TargetBitsMinusOneConst =
6509 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6511 .buildAShr(TargetTy, SrcParts[NumParts - 1],
6512 TargetBitsMinusOneConst)
6515 FillValue = ZeroReg;
6523 for (
unsigned I = 0;
I < NumParts; ++
I) {
6525 Register InBoundsResult = FillValue;
6535 for (
unsigned K = 0; K < NumParts; ++K) {
6536 auto WordShiftKConst =
MIRBuilder.buildConstant(ShiftAmtTy, K);
6538 WordShift, WordShiftKConst);
6550 switch (
MI.getOpcode()) {
6551 case TargetOpcode::G_SHL:
6552 MainSrcIdx = (int)
I - (
int)K;
6553 CarrySrcIdx = MainSrcIdx - 1;
6555 case TargetOpcode::G_LSHR:
6556 case TargetOpcode::G_ASHR:
6557 MainSrcIdx = (int)
I + (
int)K;
6558 CarrySrcIdx = MainSrcIdx + 1;
6566 if (MainSrcIdx >= 0 && MainSrcIdx < (
int)NumParts) {
6567 Register MainOp = SrcParts[MainSrcIdx];
6571 if (CarrySrcIdx >= 0 && CarrySrcIdx < (
int)NumParts)
6572 CarryOp = SrcParts[CarrySrcIdx];
6573 else if (
MI.getOpcode() == TargetOpcode::G_ASHR &&
6574 CarrySrcIdx >= (
int)NumParts)
6575 CarryOp = FillValue;
6581 ResultForK = FillValue;
6587 .buildSelect(TargetTy, IsWordShiftK, ResultForK, InBoundsResult)
6594 .buildSelect(TargetTy, IsZeroShift, SrcParts[
I], InBoundsResult)
6598 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6599 MI.eraseFromParent();
6606 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
6609 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
6624 assert(Ty.isScalar() &&
"Expected scalar type to make neutral element for");
6629 "getNeutralElementForVecReduce called with invalid opcode!");
6630 case TargetOpcode::G_VECREDUCE_ADD:
6631 case TargetOpcode::G_VECREDUCE_OR:
6632 case TargetOpcode::G_VECREDUCE_XOR:
6633 case TargetOpcode::G_VECREDUCE_UMAX:
6635 case TargetOpcode::G_VECREDUCE_MUL:
6637 case TargetOpcode::G_VECREDUCE_AND:
6638 case TargetOpcode::G_VECREDUCE_UMIN:
6641 case TargetOpcode::G_VECREDUCE_SMAX:
6644 case TargetOpcode::G_VECREDUCE_SMIN:
6647 case TargetOpcode::G_VECREDUCE_FADD:
6649 case TargetOpcode::G_VECREDUCE_FMUL:
6651 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6652 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6653 assert(
false &&
"getNeutralElementForVecReduce unimplemented for "
6654 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6662 unsigned Opc =
MI.getOpcode();
6664 case TargetOpcode::G_IMPLICIT_DEF:
6665 case TargetOpcode::G_LOAD: {
6673 case TargetOpcode::G_STORE:
6680 case TargetOpcode::G_AND:
6681 case TargetOpcode::G_OR:
6682 case TargetOpcode::G_XOR:
6683 case TargetOpcode::G_ADD:
6684 case TargetOpcode::G_SUB:
6685 case TargetOpcode::G_MUL:
6686 case TargetOpcode::G_FADD:
6687 case TargetOpcode::G_FSUB:
6688 case TargetOpcode::G_FMUL:
6689 case TargetOpcode::G_FDIV:
6690 case TargetOpcode::G_FCOPYSIGN:
6691 case TargetOpcode::G_UADDSAT:
6692 case TargetOpcode::G_USUBSAT:
6693 case TargetOpcode::G_SADDSAT:
6694 case TargetOpcode::G_SSUBSAT:
6695 case TargetOpcode::G_SMIN:
6696 case TargetOpcode::G_SMAX:
6697 case TargetOpcode::G_UMIN:
6698 case TargetOpcode::G_UMAX:
6699 case TargetOpcode::G_FMINNUM:
6700 case TargetOpcode::G_FMAXNUM:
6701 case TargetOpcode::G_FMINNUM_IEEE:
6702 case TargetOpcode::G_FMAXNUM_IEEE:
6703 case TargetOpcode::G_FMINIMUM:
6704 case TargetOpcode::G_FMAXIMUM:
6705 case TargetOpcode::G_FMINIMUMNUM:
6706 case TargetOpcode::G_FMAXIMUMNUM:
6707 case TargetOpcode::G_STRICT_FADD:
6708 case TargetOpcode::G_STRICT_FSUB:
6709 case TargetOpcode::G_STRICT_FMUL: {
6717 case TargetOpcode::G_SHL:
6718 case TargetOpcode::G_ASHR:
6719 case TargetOpcode::G_LSHR: {
6725 MRI.getType(
MI.getOperand(2).getReg()).getElementType());
6731 case TargetOpcode::G_FMA:
6732 case TargetOpcode::G_STRICT_FMA:
6733 case TargetOpcode::G_FSHR:
6734 case TargetOpcode::G_FSHL: {
6743 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6744 case TargetOpcode::G_EXTRACT:
6751 case TargetOpcode::G_INSERT:
6752 case TargetOpcode::G_INSERT_VECTOR_ELT:
6753 case TargetOpcode::G_FREEZE:
6754 case TargetOpcode::G_FNEG:
6755 case TargetOpcode::G_FABS:
6756 case TargetOpcode::G_FSQRT:
6757 case TargetOpcode::G_FCEIL:
6758 case TargetOpcode::G_FFLOOR:
6759 case TargetOpcode::G_FNEARBYINT:
6760 case TargetOpcode::G_FRINT:
6761 case TargetOpcode::G_INTRINSIC_ROUND:
6762 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6763 case TargetOpcode::G_INTRINSIC_TRUNC:
6764 case TargetOpcode::G_BITREVERSE:
6765 case TargetOpcode::G_BSWAP:
6766 case TargetOpcode::G_FCANONICALIZE:
6767 case TargetOpcode::G_SEXT_INREG:
6768 case TargetOpcode::G_ABS:
6769 case TargetOpcode::G_CTLZ:
6770 case TargetOpcode::G_CTPOP:
6778 case TargetOpcode::G_SELECT: {
6779 auto [DstReg, DstTy, CondReg, CondTy] =
MI.getFirst2RegLLTs();
6781 if (!CondTy.isScalar() ||
6787 auto ShufSplat =
MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6789 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6794 if (CondTy.isVector())
6804 case TargetOpcode::G_UNMERGE_VALUES:
6806 case TargetOpcode::G_PHI:
6808 case TargetOpcode::G_SHUFFLE_VECTOR:
6810 case TargetOpcode::G_BUILD_VECTOR: {
6812 for (
auto Op :
MI.uses()) {
6820 MIRBuilder.buildDeleteTrailingVectorElements(
6821 MI.getOperand(0).getReg(),
MIRBuilder.buildInstr(
Opc, {MoreTy}, Elts));
6822 MI.eraseFromParent();
6825 case TargetOpcode::G_SEXT:
6826 case TargetOpcode::G_ZEXT:
6827 case TargetOpcode::G_ANYEXT:
6828 case TargetOpcode::G_TRUNC:
6829 case TargetOpcode::G_FPTRUNC:
6830 case TargetOpcode::G_FPEXT:
6831 case TargetOpcode::G_FPTOSI:
6832 case TargetOpcode::G_FPTOUI:
6833 case TargetOpcode::G_FPTOSI_SAT:
6834 case TargetOpcode::G_FPTOUI_SAT:
6835 case TargetOpcode::G_SITOFP:
6836 case TargetOpcode::G_UITOFP: {
6843 MRI.getType(
MI.getOperand(1).getReg()).getElementType());
6846 MRI.getType(
MI.getOperand(0).getReg()).getElementType());
6854 case TargetOpcode::G_ICMP:
6855 case TargetOpcode::G_FCMP: {
6863 MRI.getType(
MI.getOperand(0).getReg()).getElementType());
6868 case TargetOpcode::G_BITCAST: {
6872 LLT SrcTy = MRI.getType(
MI.getOperand(1).getReg());
6873 LLT DstTy = MRI.getType(
MI.getOperand(0).getReg());
6889 case TargetOpcode::G_VECREDUCE_FADD:
6890 case TargetOpcode::G_VECREDUCE_FMUL:
6891 case TargetOpcode::G_VECREDUCE_ADD:
6892 case TargetOpcode::G_VECREDUCE_MUL:
6893 case TargetOpcode::G_VECREDUCE_AND:
6894 case TargetOpcode::G_VECREDUCE_OR:
6895 case TargetOpcode::G_VECREDUCE_XOR:
6896 case TargetOpcode::G_VECREDUCE_SMAX:
6897 case TargetOpcode::G_VECREDUCE_SMIN:
6898 case TargetOpcode::G_VECREDUCE_UMAX:
6899 case TargetOpcode::G_VECREDUCE_UMIN: {
6900 LLT OrigTy = MRI.getType(
MI.getOperand(1).getReg());
6902 auto NewVec =
MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6903 auto NeutralElement = getNeutralElementForVecReduce(
6909 auto Idx =
MIRBuilder.buildConstant(IdxTy, i);
6910 NewVec =
MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6911 NeutralElement, Idx);
6915 MO.
setReg(NewVec.getReg(0));
6927 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6929 unsigned MaskNumElts = Mask.size();
6930 unsigned SrcNumElts = SrcTy.getNumElements();
6933 if (MaskNumElts == SrcNumElts)
6936 if (MaskNumElts < SrcNumElts) {
6944 MIRBuilder.buildShuffleVector(
MI.getOperand(0).getReg(),
6945 MI.getOperand(1).getReg(),
6946 MI.getOperand(2).getReg(), NewMask);
6947 MI.eraseFromParent();
6952 unsigned PaddedMaskNumElts =
alignTo(MaskNumElts, SrcNumElts);
6953 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
6962 MOps1[0] =
MI.getOperand(1).getReg();
6963 MOps2[0] =
MI.getOperand(2).getReg();
6965 auto Src1 =
MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
6966 auto Src2 =
MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
6970 for (
unsigned I = 0;
I != MaskNumElts; ++
I) {
6972 if (Idx >=
static_cast<int>(SrcNumElts))
6973 Idx += PaddedMaskNumElts - SrcNumElts;
6978 if (MaskNumElts != PaddedMaskNumElts) {
6980 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
6983 for (
unsigned I = 0;
I < MaskNumElts; ++
I) {
6985 MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle,
I)
6990 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
6993 MI.eraseFromParent();
6999 unsigned int TypeIdx,
LLT MoreTy) {
7000 auto [DstTy, Src1Ty, Src2Ty] =
MI.getFirst3LLTs();
7002 unsigned NumElts = DstTy.getNumElements();
7005 if (DstTy.isVector() && Src1Ty.isVector() &&
7006 DstTy.getNumElements() != Src1Ty.getNumElements()) {
7014 if (DstTy != Src1Ty || DstTy != Src2Ty)
7022 for (
unsigned I = 0;
I != NumElts; ++
I) {
7024 if (Idx <
static_cast<int>(NumElts))
7027 NewMask[
I] = Idx - NumElts + WidenNumElts;
7031 MIRBuilder.buildShuffleVector(
MI.getOperand(0).getReg(),
7032 MI.getOperand(1).getReg(),
7033 MI.getOperand(2).getReg(), NewMask);
7034 MI.eraseFromParent();
7043 unsigned SrcParts = Src1Regs.
size();
7044 unsigned DstParts = DstRegs.
size();
7046 unsigned DstIdx = 0;
7048 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
7049 DstRegs[DstIdx] = FactorSum;
7054 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
7056 for (
unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
7057 i <= std::min(DstIdx, SrcParts - 1); ++i) {
7059 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
7063 for (
unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
7064 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
7066 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
7076 if (DstIdx != DstParts - 1) {
7077 MachineInstrBuilder Uaddo =
7078 B.buildUAddo(NarrowTy,
LLT::scalar(1), Factors[0], Factors[1]);
7079 FactorSum = Uaddo.
getReg(0);
7080 CarrySum =
B.buildZExt(NarrowTy, Uaddo.
getReg(1)).getReg(0);
7081 for (
unsigned i = 2; i < Factors.
size(); ++i) {
7082 MachineInstrBuilder Uaddo =
7083 B.buildUAddo(NarrowTy,
LLT::scalar(1), FactorSum, Factors[i]);
7084 FactorSum = Uaddo.
getReg(0);
7085 MachineInstrBuilder Carry =
B.buildZExt(NarrowTy, Uaddo.
getReg(1));
7086 CarrySum =
B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
7090 FactorSum =
B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
7091 for (
unsigned i = 2; i < Factors.
size(); ++i)
7092 FactorSum =
B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
7095 CarrySumPrevDstIdx = CarrySum;
7096 DstRegs[DstIdx] = FactorSum;
7108 LLT DstType = MRI.getType(DstReg);
7110 if (DstType.isVector())
7113 unsigned Opcode =
MI.getOpcode();
7114 unsigned OpO, OpE, OpF;
7116 case TargetOpcode::G_SADDO:
7117 case TargetOpcode::G_SADDE:
7118 case TargetOpcode::G_UADDO:
7119 case TargetOpcode::G_UADDE:
7120 case TargetOpcode::G_ADD:
7121 OpO = TargetOpcode::G_UADDO;
7122 OpE = TargetOpcode::G_UADDE;
7123 OpF = TargetOpcode::G_UADDE;
7124 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
7125 OpF = TargetOpcode::G_SADDE;
7127 case TargetOpcode::G_SSUBO:
7128 case TargetOpcode::G_SSUBE:
7129 case TargetOpcode::G_USUBO:
7130 case TargetOpcode::G_USUBE:
7131 case TargetOpcode::G_SUB:
7132 OpO = TargetOpcode::G_USUBO;
7133 OpE = TargetOpcode::G_USUBE;
7134 OpF = TargetOpcode::G_USUBE;
7135 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
7136 OpF = TargetOpcode::G_SSUBE;
7143 unsigned NumDefs =
MI.getNumExplicitDefs();
7144 Register Src1 =
MI.getOperand(NumDefs).getReg();
7145 Register Src2 =
MI.getOperand(NumDefs + 1).getReg();
7148 CarryDst =
MI.getOperand(1).getReg();
7149 if (
MI.getNumOperands() == NumDefs + 3)
7150 CarryIn =
MI.getOperand(NumDefs + 2).getReg();
7152 LLT RegTy = MRI.getType(
MI.getOperand(0).getReg());
7153 LLT LeftoverTy, DummyTy;
7155 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
7160 int NarrowParts = Src1Regs.
size();
7161 Src1Regs.
append(Src1Left);
7162 Src2Regs.
append(Src2Left);
7165 for (
int i = 0, e = Src1Regs.
size(); i != e; ++i) {
7167 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
7170 if (i == e - 1 && CarryDst)
7171 CarryOut = CarryDst;
7173 CarryOut = MRI.createGenericVirtualRegister(
LLT::scalar(1));
7176 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
7177 {Src1Regs[i], Src2Regs[i]});
7178 }
else if (i == e - 1) {
7179 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
7180 {Src1Regs[i], Src2Regs[i], CarryIn});
7182 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
7183 {Src1Regs[i], Src2Regs[i], CarryIn});
7189 insertParts(
MI.getOperand(0).getReg(), RegTy, NarrowTy,
7190 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
7191 ArrayRef(DstRegs).drop_front(NarrowParts));
7193 MI.eraseFromParent();
7199 auto [DstReg, Src1, Src2] =
MI.getFirst3Regs();
7201 LLT Ty = MRI.getType(DstReg);
7205 unsigned Size = Ty.getSizeInBits();
7207 if (
Size % NarrowSize != 0)
7210 unsigned NumParts =
Size / NarrowSize;
7211 bool IsMulHigh =
MI.getOpcode() == TargetOpcode::G_UMULH;
7212 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
7218 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
7222 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7223 MI.eraseFromParent();
7233 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_FPTOSI;
7236 LLT SrcTy = MRI.getType(Src);
7247 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
7260 int64_t SizeOp1 = MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
7263 if (SizeOp1 % NarrowSize != 0)
7265 int NumParts = SizeOp1 / NarrowSize;
7268 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
7272 uint64_t OpStart =
MI.getOperand(2).getImm();
7273 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7274 for (
int i = 0; i < NumParts; ++i) {
7275 unsigned SrcStart = i * NarrowSize;
7277 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
7280 }
else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7288 int64_t ExtractOffset;
7290 if (OpStart < SrcStart) {
7292 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
7294 ExtractOffset = OpStart - SrcStart;
7295 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
7299 if (ExtractOffset != 0 || SegSize != NarrowSize) {
7301 SegReg = MRI.createGenericVirtualRegister(
LLT::scalar(SegSize));
7302 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
7309 if (MRI.getType(DstReg).isVector())
7310 MIRBuilder.buildBuildVector(DstReg, DstRegs);
7311 else if (DstRegs.
size() > 1)
7312 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7315 MI.eraseFromParent();
7327 LLT RegTy = MRI.getType(
MI.getOperand(0).getReg());
7329 extractParts(
MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
7332 SrcRegs.
append(LeftoverRegs);
7336 uint64_t OpStart =
MI.getOperand(3).getImm();
7337 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7338 for (
int I = 0, E = SrcRegs.
size();
I != E; ++
I) {
7339 unsigned DstStart =
I * NarrowSize;
7341 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7349 if (MRI.getType(SrcRegs[
I]) == LeftoverTy) {
7351 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
7355 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
7363 int64_t ExtractOffset, InsertOffset;
7365 if (OpStart < DstStart) {
7367 ExtractOffset = DstStart - OpStart;
7368 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
7370 InsertOffset = OpStart - DstStart;
7373 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
7377 if (ExtractOffset != 0 || SegSize != OpSize) {
7379 SegReg = MRI.createGenericVirtualRegister(
LLT::scalar(SegSize));
7380 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
7383 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
7384 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
7392 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
7395 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7397 MI.eraseFromParent();
7405 LLT DstTy = MRI.getType(DstReg);
7407 assert(
MI.getNumOperands() == 3 && TypeIdx == 0);
7413 if (!
extractParts(
MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
7414 Src0Regs, Src0LeftoverRegs,
MIRBuilder, MRI))
7418 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
7419 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
7422 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
7423 auto Inst =
MIRBuilder.buildInstr(
MI.getOpcode(), {NarrowTy},
7424 {Src0Regs[I], Src1Regs[I]});
7428 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
7431 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
7432 DstLeftoverRegs.
push_back(Inst.getReg(0));
7435 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7436 LeftoverTy, DstLeftoverRegs);
7438 MI.eraseFromParent();
7448 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
7450 LLT DstTy = MRI.getType(DstReg);
7455 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
7456 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
MI.getOpcode());
7457 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
7459 MI.eraseFromParent();
7469 Register CondReg =
MI.getOperand(1).getReg();
7470 LLT CondTy = MRI.getType(CondReg);
7471 if (CondTy.isVector())
7475 LLT DstTy = MRI.getType(DstReg);
7481 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
7482 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
7486 if (!
extractParts(
MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
7487 Src2Regs, Src2LeftoverRegs,
MIRBuilder, MRI))
7490 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
7492 CondReg, Src1Regs[
I], Src2Regs[
I]);
7496 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
7498 LeftoverTy, CondReg, Src1LeftoverRegs[
I], Src2LeftoverRegs[
I]);
7502 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7503 LeftoverTy, DstLeftoverRegs);
7505 MI.eraseFromParent();
7515 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7518 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7519 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
7522 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
7524 auto C_0 =
B.buildConstant(NarrowTy, 0);
7526 UnmergeSrc.getReg(1), C_0);
7527 auto LoCTLZ = IsUndef ?
7528 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
7529 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
7530 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
7531 auto HiIsZeroCTLZ =
B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
7532 auto HiCTLZ =
B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
7533 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
7535 MI.eraseFromParent();
7548 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7551 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7552 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
7555 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
7557 auto C_0 =
B.buildConstant(NarrowTy, 0);
7559 UnmergeSrc.getReg(0), C_0);
7560 auto HiCTTZ = IsUndef ?
7561 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
7562 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
7563 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
7564 auto LoIsZeroCTTZ =
B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
7565 auto LoCTTZ =
B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
7566 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
7568 MI.eraseFromParent();
7581 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7584 if (!SrcTy.isScalar() || SrcTy.getSizeInBits() != 2 * NarrowSize)
7589 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
7593 auto ShAmt =
B.buildConstant(NarrowTy, NarrowSize - 1);
7594 auto Sign =
B.buildAShr(NarrowTy,
Hi, ShAmt);
7602 auto LoInv =
B.buildXor(DstTy,
Lo, Sign);
7603 auto LoCTLZ =
B.buildCTLZ(DstTy, LoInv);
7606 auto C_NarrowSizeM1 =
B.buildConstant(DstTy, NarrowSize - 1);
7607 auto HiIsSignCTLS =
B.buildAdd(DstTy, LoCTLZ, C_NarrowSizeM1);
7609 auto HiCTLS =
B.buildCTLS(DstTy,
Hi);
7611 B.buildSelect(DstReg, HiIsSign, HiIsSignCTLS, HiCTLS);
7613 MI.eraseFromParent();
7623 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7626 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7627 auto UnmergeSrc =
MIRBuilder.buildUnmerge(NarrowTy,
MI.getOperand(1));
7629 auto LoCTPOP =
MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
7630 auto HiCTPOP =
MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
7631 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
7633 MI.eraseFromParent();
7648 LLT ExpTy = MRI.getType(ExpReg);
7653 auto MinExp =
B.buildConstant(ExpTy,
minIntN(ClampSize));
7654 auto ClampMin =
B.buildSMax(ExpTy, ExpReg, MinExp);
7655 auto MaxExp =
B.buildConstant(ExpTy,
maxIntN(ClampSize));
7656 auto Clamp =
B.buildSMin(ExpTy, ClampMin, MaxExp);
7658 auto Trunc =
B.buildTrunc(NarrowTy, Clamp);
7660 MI.getOperand(2).setReg(Trunc.getReg(0));
7667 unsigned Opc =
MI.getOpcode();
7670 auto QAction = LI.getAction(Q).Action;
7676 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
7679 MI.setDesc(
TII.get(TargetOpcode::G_CTLZ));
7683 case TargetOpcode::G_CTLZ: {
7684 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7685 unsigned Len = SrcTy.getScalarSizeInBits();
7687 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7689 auto CtlzZU =
MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
7690 auto ZeroSrc =
MIRBuilder.buildConstant(SrcTy, 0);
7693 auto LenConst =
MIRBuilder.buildConstant(DstTy, Len);
7694 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
7695 MI.eraseFromParent();
7711 for (
unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7712 auto MIBShiftAmt =
MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7715 Op = MIBOp.getReg(0);
7720 MI.eraseFromParent();
7723 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
7726 MI.setDesc(
TII.get(TargetOpcode::G_CTTZ));
7730 case TargetOpcode::G_CTTZ: {
7731 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7733 unsigned Len = SrcTy.getScalarSizeInBits();
7734 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7737 auto CttzZU =
MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
7738 auto Zero =
MIRBuilder.buildConstant(SrcTy, 0);
7741 auto LenConst =
MIRBuilder.buildConstant(DstTy, Len);
7742 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
7743 MI.eraseFromParent();
7750 auto MIBCstNeg1 =
MIRBuilder.buildConstant(SrcTy, -1);
7751 auto MIBNot =
MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7753 SrcTy, MIBNot,
MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7754 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7755 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7756 auto MIBCstLen =
MIRBuilder.buildConstant(SrcTy, Len);
7759 MI.eraseFromParent();
7763 MI.setDesc(
TII.get(TargetOpcode::G_CTPOP));
7764 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7768 case TargetOpcode::G_CTPOP: {
7770 LLT Ty = MRI.getType(SrcReg);
7771 unsigned Size = Ty.getScalarSizeInBits();
7783 auto C_1 =
B.buildConstant(Ty, 1);
7784 auto B2Set1LoTo1Hi =
B.buildLShr(Ty, SrcReg, C_1);
7786 auto C_B2Mask1HiTo0 =
B.buildConstant(Ty, B2Mask1HiTo0);
7787 auto B2Count1Hi =
B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7788 auto B2Count =
B.buildSub(Ty, SrcReg, B2Count1Hi);
7792 auto C_2 =
B.buildConstant(Ty, 2);
7793 auto B4Set2LoTo2Hi =
B.buildLShr(Ty, B2Count, C_2);
7795 auto C_B4Mask2HiTo0 =
B.buildConstant(Ty, B4Mask2HiTo0);
7796 auto B4HiB2Count =
B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7797 auto B4LoB2Count =
B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7798 auto B4Count =
B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7805 auto C_4 =
B.buildConstant(Ty, 4);
7806 auto B8HiB4Count =
B.buildLShr(Ty, B4Count, C_4);
7807 auto B8CountDirty4Hi =
B.buildAdd(Ty, B8HiB4Count, B4Count);
7809 auto C_B8Mask4HiTo0 =
B.buildConstant(Ty, B8Mask4HiTo0);
7810 auto B8Count =
B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7812 assert(
Size <= 128 &&
"Scalar size is too large for CTPOP lower algorithm");
7815 if (
Size == 16 && !Ty.isVector()) {
7817 auto C_8 =
B.buildConstant(Ty, 8);
7818 auto HighSum =
B.buildLShr(Ty, B8Count, C_8);
7819 auto Res =
B.buildAdd(Ty, B8Count, HighSum);
7820 B.buildAnd(
MI.getOperand(0).getReg(), Res,
B.buildConstant(Ty, 0xFF));
7821 MI.eraseFromParent();
7830 auto C_SizeM8 =
B.buildConstant(Ty,
Size - 8);
7832 auto IsMulSupported = [
this](
const LLT Ty) {
7833 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7836 if (IsMulSupported(Ty)) {
7837 auto ResTmp =
B.buildMul(Ty, B8Count, MulMask);
7838 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7840 auto ResTmp = B8Count;
7841 for (
unsigned Shift = 8; Shift <
Size; Shift *= 2) {
7842 auto ShiftC =
B.buildConstant(Ty, Shift);
7843 auto Shl =
B.buildShl(Ty, ResTmp, ShiftC);
7844 ResTmp =
B.buildAdd(Ty, ResTmp, Shl);
7846 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7848 MI.eraseFromParent();
7851 case TargetOpcode::G_CTLS: {
7852 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7856 MIRBuilder.buildConstant(SrcTy, SrcTy.getScalarSizeInBits() - 1);
7857 auto OneC =
MIRBuilder.buildConstant(DstTy, 1);
7859 auto Shr =
MIRBuilder.buildAShr(SrcTy, SrcReg, SignIdxC);
7865 MI.eraseFromParent();
7886 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
7887 LLT Ty = MRI.getType(Dst);
7888 LLT ShTy = MRI.getType(Z);
7895 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
7896 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7901 auto Zero =
MIRBuilder.buildConstant(ShTy, 0);
7902 Z =
MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7906 auto One =
MIRBuilder.buildConstant(ShTy, 1);
7919 MI.eraseFromParent();
7925 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
7926 LLT Ty = MRI.getType(Dst);
7927 LLT ShTy = MRI.getType(Z);
7930 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
7940 auto BitWidthC =
MIRBuilder.buildConstant(ShTy, BW);
7941 ShAmt =
MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7942 InvShAmt =
MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
7943 ShX =
MIRBuilder.buildShl(Ty,
X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
7944 ShY =
MIRBuilder.buildLShr(Ty,
Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
7948 auto Mask =
MIRBuilder.buildConstant(ShTy, BW - 1);
7951 ShAmt =
MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
7954 InvShAmt =
MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
7956 auto BitWidthC =
MIRBuilder.buildConstant(ShTy, BW);
7957 ShAmt =
MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7958 InvShAmt =
MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
7961 auto One =
MIRBuilder.buildConstant(ShTy, 1);
7963 ShX =
MIRBuilder.buildShl(Ty,
X, ShAmt).getReg(0);
7965 ShY =
MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
7968 ShX =
MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
7969 ShY =
MIRBuilder.buildLShr(Ty,
Y, ShAmt).getReg(0);
7974 MI.eraseFromParent();
7985 LLT Ty = MRI.getType(Dst);
7986 LLT ShTy = MRI.getType(
MI.getOperand(3).getReg());
7988 bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
7989 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7992 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action ==
Lower)
7993 return lowerFunnelShiftAsShifts(
MI);
7997 if (Result == UnableToLegalize)
7998 return lowerFunnelShiftAsShifts(
MI);
8003 auto [Dst, Src] =
MI.getFirst2Regs();
8004 LLT DstTy = MRI.getType(Dst);
8005 LLT SrcTy = MRI.getType(Src);
8009 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
8017 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
8021 auto NewExt =
MIRBuilder.buildInstr(
MI.getOpcode(), {MidTy}, {Src});
8025 auto UnmergeSrc =
MIRBuilder.buildUnmerge(EltTy, NewExt);
8030 auto ZExtRes1 =
MIRBuilder.buildInstr(
MI.getOpcode(), {ZExtResTy},
8031 {UnmergeSrc.getReg(0)});
8032 auto ZExtRes2 =
MIRBuilder.buildInstr(
MI.getOpcode(), {ZExtResTy},
8033 {UnmergeSrc.getReg(1)});
8036 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
8038 MI.eraseFromParent();
8055 assert(
MI.getOpcode() == TargetOpcode::G_TRUNC);
8059 LLT DstTy = MRI.getType(DstReg);
8060 LLT SrcTy = MRI.getType(SrcReg);
8068 SrcTy.getElementCount().divideCoefficientBy(2));
8081 Src =
MIRBuilder.buildTrunc(InterTy, Src).getReg(0);
8093 MI.eraseFromParent();
8102 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
8103 auto Zero =
MIRBuilder.buildConstant(AmtTy, 0);
8104 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
8105 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8106 auto Neg =
MIRBuilder.buildSub(AmtTy, Zero, Amt);
8107 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
8108 MI.eraseFromParent();
8113 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
8115 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
8116 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
8121 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8122 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
8124 return lowerRotateWithReverseRotate(
MI);
8127 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8128 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8129 bool IsFShLegal =
false;
8130 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
8131 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
8135 MI.eraseFromParent();
8140 return buildFunnelShift(FShOpc, Dst, Src, Amt);
8143 return buildFunnelShift(RevFsh, Dst, Src, Amt);
8148 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
8149 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
8150 auto BitWidthMinusOneC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits - 1);
8156 auto NegAmt = MIRBuilder.
buildSub(AmtTy, Zero, Amt);
8157 auto ShAmt = MIRBuilder.
buildAnd(AmtTy, Amt, BitWidthMinusOneC);
8159 auto RevAmt = MIRBuilder.
buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
8165 auto BitWidthC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits);
8166 auto ShAmt = MIRBuilder.
buildURem(AmtTy, Amt, BitWidthC);
8168 auto RevAmt = MIRBuilder.
buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
8170 auto Inner = MIRBuilder.
buildInstr(RevShiftOpc, {DstTy}, {Src, One});
8175 MI.eraseFromParent();
8183 auto [Dst, Src] =
MI.getFirst2Regs();
8188 assert(MRI.getType(Src) ==
S64 && MRI.getType(Dst) ==
S32);
8216 auto Mask1 =
MIRBuilder.buildConstant(
S64, 0xffffffffffULL);
8229 auto Select0 =
MIRBuilder.buildSelect(
S32, TCmp, VTrunc1, Zero32);
8233 MI.eraseFromParent();
8241 auto [Dst, Src] =
MI.getFirst2Regs();
8246 assert(MRI.getType(Src) ==
S64 && MRI.getType(Dst) ==
S32);
8259 auto RoundedHalved =
MIRBuilder.buildOr(
S64, Halved, LowerBit);
8261 auto LargeResult =
MIRBuilder.buildFAdd(
S32, HalvedFP, HalvedFP);
8266 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
8268 MI.eraseFromParent();
8276 auto [Dst, Src] =
MI.getFirst2Regs();
8280 assert(MRI.getType(Src) ==
S64 && MRI.getType(Dst) ==
S64);
8291 auto TwoP52 =
MIRBuilder.buildConstant(
S64, UINT64_C(0x4330000000000000));
8292 auto TwoP84 =
MIRBuilder.buildConstant(
S64, UINT64_C(0x4530000000000000));
8294 auto TwoP52P84FP =
MIRBuilder.buildFConstant(
S64, TwoP52P84);
8301 auto HighBitsFP =
MIRBuilder.buildOr(
S64, TwoP84, HighBits);
8302 auto Scratch =
MIRBuilder.buildFSub(
S64, HighBitsFP, TwoP52P84FP);
8303 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
8305 MI.eraseFromParent();
8315 auto M1 =
MI.getOpcode() == TargetOpcode::G_UITOFP
8321 MI.eraseFromParent();
8326 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8329 auto True =
MIRBuilder.buildFConstant(DstTy, 1.0);
8330 auto False =
MIRBuilder.buildFConstant(DstTy, 0.0);
8331 MIRBuilder.buildSelect(Dst, Src, True, False);
8332 MI.eraseFromParent();
8336 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8356 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8363 auto True =
MIRBuilder.buildFConstant(DstTy, -1.0);
8364 auto False =
MIRBuilder.buildFConstant(DstTy, 0.0);
8365 MIRBuilder.buildSelect(Dst, Src, True, False);
8366 MI.eraseFromParent();
8370 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8393 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
8394 MI.eraseFromParent();
8402 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8406 if (SrcTy !=
S64 && SrcTy !=
S32)
8408 if (DstTy !=
S32 && DstTy !=
S64)
8435 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
8437 MI.eraseFromParent();
8442 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8447 if (SrcTy.getScalarType() !=
S32 || DstTy.getScalarType() !=
S64)
8454 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
8456 auto ExponentMask =
MIRBuilder.buildConstant(SrcTy, 0x7F800000);
8457 auto ExponentLoBit =
MIRBuilder.buildConstant(SrcTy, 23);
8459 auto AndExpMask =
MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
8460 auto ExponentBits =
MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
8462 auto SignMask =
MIRBuilder.buildConstant(SrcTy,
8464 auto AndSignMask =
MIRBuilder.buildAnd(SrcTy, Src, SignMask);
8465 auto SignLowBit =
MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
8466 auto Sign =
MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
8469 auto MantissaMask =
MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
8470 auto AndMantissaMask =
MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
8471 auto K =
MIRBuilder.buildConstant(SrcTy, 0x00800000);
8473 auto R =
MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
8476 auto Bias =
MIRBuilder.buildConstant(SrcTy, 127);
8481 auto Shl =
MIRBuilder.buildShl(DstTy, R, SubExponent);
8482 auto Srl =
MIRBuilder.buildLShr(DstTy, R, ExponentSub);
8488 R =
MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
8490 auto XorSign =
MIRBuilder.buildXor(DstTy, R, Sign);
8491 auto Ret =
MIRBuilder.buildSub(DstTy, XorSign, Sign);
8493 auto ZeroSrcTy =
MIRBuilder.buildConstant(SrcTy, 0);
8498 auto ZeroDstTy =
MIRBuilder.buildConstant(DstTy, 0);
8499 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
8501 MI.eraseFromParent();
8507 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8509 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
8510 unsigned SatWidth = DstTy.getScalarSizeInBits();
8514 APInt MinInt, MaxInt;
8537 if (AreExactFloatBounds) {
8539 auto MaxC =
MIRBuilder.buildFConstant(SrcTy, MinFloat);
8541 SrcTy.changeElementSize(1), Src, MaxC);
8542 auto Max =
MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
8544 auto MinC =
MIRBuilder.buildFConstant(SrcTy, MaxFloat);
8554 MI.eraseFromParent();
8559 auto FpToInt =
MIRBuilder.buildFPTOSI(DstTy, Min);
8561 DstTy.changeElementSize(1), Src, Src);
8564 MI.eraseFromParent();
8571 auto FpToInt = IsSigned ?
MIRBuilder.buildFPTOSI(DstTy, Src)
8580 DstTy, ULT,
MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
8591 MI.eraseFromParent();
8597 DstTy, OGT,
MIRBuilder.buildConstant(DstTy, MaxInt), Max);
8599 DstTy.changeElementSize(1), Src, Src);
8601 MI.eraseFromParent();
8611 auto [Dst, Src] =
MI.getFirst2Regs();
8613 MRI.getType(Src).getScalarType() ==
LLT::scalar(64));
8615 if (MRI.getType(Src).isVector())
8619 unsigned Flags =
MI.getFlags();
8622 MI.eraseFromParent();
8626 const unsigned ExpMask = 0x7ff;
8627 const unsigned ExpBiasf64 = 1023;
8628 const unsigned ExpBiasf16 = 15;
8657 auto SelectCC =
MIRBuilder.buildSelect(
S32, CmpM_NE0, Bits0x200, Zero);
8717 MI.eraseFromParent();
8723 auto [DstTy, SrcTy] =
MI.getFirst2LLTs();
8727 if (DstTy.getScalarType() ==
S16 && SrcTy.getScalarType() ==
S64)
8734 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8735 LLT Ty = MRI.getType(Dst);
8737 auto CvtSrc1 =
MIRBuilder.buildSITOFP(Ty, Src1);
8738 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1,
MI.getFlags());
8739 MI.eraseFromParent();
8744 auto [DstFrac, DstInt, Src] =
MI.getFirst3Regs();
8745 LLT Ty = MRI.getType(Src);
8746 auto Flags =
MI.getFlags();
8753 FracToUse = FracPart.getReg(0);
8755 auto Abs =
MIRBuilder.buildFAbs(Ty, Src, Flags);
8759 Ty.changeElementSize(1), Abs, Inf);
8760 auto Zero =
MIRBuilder.buildFConstant(Ty, 0.0);
8762 FracToUse =
Select.getReg(0);
8765 MIRBuilder.buildFCopysign(DstFrac, FracToUse, Src, Flags);
8768 MI.eraseFromParent();
8774 case TargetOpcode::G_SMIN:
8776 case TargetOpcode::G_SMAX:
8778 case TargetOpcode::G_UMIN:
8780 case TargetOpcode::G_UMAX:
8788 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8793 auto Cmp =
MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8794 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8796 MI.eraseFromParent();
8805 LLT DstTy = MRI.getType(Dst);
8806 LLT SrcTy = MRI.getType(Cmp->getReg(1));
8816 auto Zero =
MIRBuilder.buildConstant(DstTy, 0);
8817 auto IsGT =
MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8819 auto IsLT =
MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8822 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
8823 auto BC = TLI.getBooleanContents(DstTy.
isVector(),
false);
8824 if (TLI.preferSelectsOverBooleanArithmetic(
8827 auto One =
MIRBuilder.buildConstant(DstTy, 1);
8828 auto SelectZeroOrOne =
MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8830 auto MinusOne =
MIRBuilder.buildConstant(DstTy, -1);
8831 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8837 unsigned BoolExtOp =
8839 IsGT =
MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8840 IsLT =
MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8844 MI.eraseFromParent();
8850 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] =
MI.getFirst3RegLLTs();
8851 const int Src0Size = Src0Ty.getScalarSizeInBits();
8852 const int Src1Size = Src1Ty.getScalarSizeInBits();
8857 auto NotSignBitMask =
MIRBuilder.buildConstant(
8862 if (Src0Ty == Src1Ty) {
8863 And1 =
MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
8864 }
else if (Src0Size > Src1Size) {
8865 auto ShiftAmt =
MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
8866 auto Zext =
MIRBuilder.buildZExt(Src0Ty, Src1);
8867 auto Shift =
MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
8868 And1 =
MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
8870 auto ShiftAmt =
MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
8871 auto Shift =
MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
8872 auto Trunc =
MIRBuilder.buildTrunc(Src0Ty, Shift);
8873 And1 =
MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
8879 unsigned Flags =
MI.getFlags();
8886 MI.eraseFromParent();
8897 switch (
MI.getOpcode()) {
8898 case TargetOpcode::G_FMINNUM:
8899 NewOp = TargetOpcode::G_FMINNUM_IEEE;
8901 case TargetOpcode::G_FMINIMUMNUM:
8902 NewOp = TargetOpcode::G_FMINNUM;
8904 case TargetOpcode::G_FMAXNUM:
8905 NewOp = TargetOpcode::G_FMAXNUM_IEEE;
8907 case TargetOpcode::G_FMAXIMUMNUM:
8908 NewOp = TargetOpcode::G_FMAXNUM;
8914 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8915 LLT Ty = MRI.getType(Dst);
8925 Src0 =
MIRBuilder.buildFCanonicalize(Ty, Src0,
MI.getFlags()).getReg(0);
8928 Src1 =
MIRBuilder.buildFCanonicalize(Ty, Src1,
MI.getFlags()).getReg(0);
8933 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1},
MI.getFlags());
8934 MI.eraseFromParent();
8940 unsigned Opc =
MI.getOpcode();
8941 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8942 LLT Ty = MRI.getType(Dst);
8945 bool IsMax = (
Opc == TargetOpcode::G_FMAXIMUM);
8947 IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE;
8948 unsigned OpcNonIeee =
8949 IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM;
8950 bool MinMaxMustRespectOrderedZero =
false;
8954 if (LI.isLegalOrCustom({OpcIeee, Ty})) {
8956 MinMaxMustRespectOrderedZero =
true;
8957 }
else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) {
8962 Res =
MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0);
8970 LLT ElementTy = Ty.
isScalar() ? Ty : Ty.getElementType();
8974 NaN =
MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0);
8976 Res =
MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0);
8986 const unsigned Flags =
MI.getFlags();
8992 auto LHSTestZero =
MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass);
8994 MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags);
8996 auto RHSTestZero =
MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass);
8998 MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags);
9000 Res =
MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0);
9005 MI.eraseFromParent();
9012 LLT Ty = MRI.getType(DstReg);
9013 unsigned Flags =
MI.getFlags();
9018 MI.eraseFromParent();
9024 auto [DstReg,
X] =
MI.getFirst2Regs();
9025 const unsigned Flags =
MI.getFlags();
9026 const LLT Ty = MRI.getType(DstReg);
9038 auto AbsDiff =
MIRBuilder.buildFAbs(Ty, Diff, Flags);
9040 auto Half =
MIRBuilder.buildFConstant(Ty, 0.5);
9045 auto One =
MIRBuilder.buildFConstant(Ty, 1.0);
9046 auto Zero =
MIRBuilder.buildFConstant(Ty, 0.0);
9047 auto BoolFP =
MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
9048 auto SignedOffset =
MIRBuilder.buildFCopysign(Ty, BoolFP,
X);
9050 MIRBuilder.buildFAdd(DstReg,
T, SignedOffset, Flags);
9052 MI.eraseFromParent();
9057 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
9058 unsigned Flags =
MI.getFlags();
9059 LLT Ty = MRI.getType(DstReg);
9066 auto Trunc =
MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
9067 auto Zero =
MIRBuilder.buildFConstant(Ty, 0.0);
9070 SrcReg, Zero, Flags);
9072 SrcReg, Trunc, Flags);
9076 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
9077 MI.eraseFromParent();
9083 const unsigned NumOps =
MI.getNumOperands();
9084 auto [DstReg, DstTy, Src0Reg, Src0Ty] =
MI.getFirst2RegLLTs();
9085 unsigned PartSize = Src0Ty.getSizeInBits();
9090 for (
unsigned I = 2;
I !=
NumOps; ++
I) {
9091 const unsigned Offset = (
I - 1) * PartSize;
9094 auto ZextInput =
MIRBuilder.buildZExt(WideTy, SrcReg);
9097 MRI.createGenericVirtualRegister(WideTy);
9100 auto Shl =
MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
9101 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
9102 ResultReg = NextResult;
9105 if (DstTy.isPointer()) {
9106 if (
MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
9107 DstTy.getAddressSpace())) {
9115 MI.eraseFromParent();
9121 const unsigned NumDst =
MI.getNumOperands() - 1;
9122 Register SrcReg =
MI.getOperand(NumDst).getReg();
9123 Register Dst0Reg =
MI.getOperand(0).getReg();
9124 LLT DstTy = MRI.getType(Dst0Reg);
9133 LLT IntTy = MRI.getType(SrcReg);
9138 unsigned Offset = DstSize;
9139 for (
unsigned I = 1;
I != NumDst; ++
I,
Offset += DstSize) {
9141 auto Shift =
MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
9145 MI.eraseFromParent();
9164 if (
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
9165 InsertVal =
MI.getOperand(2).getReg();
9167 Register Idx =
MI.getOperand(
MI.getNumOperands() - 1).getReg();
9169 LLT VecTy = MRI.getType(SrcVec);
9179 SrcRegs[IdxVal] =
MI.getOperand(2).getReg();
9180 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
9182 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
9185 MI.eraseFromParent();
9190 LLVM_DEBUG(
dbgs() <<
"Can't handle non-byte element vectors yet\n");
9201 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
9208 int64_t
Offset = IdxVal * EltBytes;
9219 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
9222 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
9224 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
9227 MI.eraseFromParent();
9233 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
9234 MI.getFirst3RegLLTs();
9244 for (
int Idx : Mask) {
9246 if (!
Undef.isValid())
9252 assert(!Src0Ty.isScalar() &&
"Unexpected scalar G_SHUFFLE_VECTOR");
9254 int NumElts = Src0Ty.getNumElements();
9255 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
9256 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
9257 auto [It, Inserted] = CachedExtract.
try_emplace(Idx);
9259 auto IdxK =
MIRBuilder.buildConstant(IdxTy, ExtractIdx);
9261 MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK).getReg(0);
9266 assert(DstTy.isVector() &&
"Unexpected scalar G_SHUFFLE_VECTOR");
9267 MIRBuilder.buildBuildVector(DstReg, BuildVec);
9268 MI.eraseFromParent();
9274 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
9275 MI.getFirst4RegLLTs();
9277 if (VecTy.isScalableVector())
9293 auto OutPos =
MIRBuilder.buildConstant(IdxTy, 0);
9296 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
9299 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
9302 std::optional<APInt> PassthruSplatVal =
9305 if (PassthruSplatVal.has_value()) {
9307 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
9308 }
else if (HasPassthru) {
9309 auto Popcount =
MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
9310 Popcount =
MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
9316 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
9320 unsigned NumElmts = VecTy.getNumElements();
9321 for (
unsigned I = 0;
I < NumElmts; ++
I) {
9323 auto Val =
MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
9326 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
9329 auto MaskI =
MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
9334 OutPos =
MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
9336 if (HasPassthru &&
I == NumElmts - 1) {
9339 auto AllLanesSelected =
MIRBuilder.buildICmp(
9341 OutPos =
MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
9342 {OutPos, EndOfVector});
9346 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
9348 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
9353 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
9355 MI.eraseFromParent();
9366 SPTmp =
MIRBuilder.buildCast(IntPtrTy, SPTmp);
9372 if (Alignment >
Align(1)) {
9375 auto AlignCst =
MIRBuilder.buildConstant(IntPtrTy, AlignMask);
9384 const auto &MF = *
MI.getMF();
9390 Register AllocSize =
MI.getOperand(1).getReg();
9393 LLT PtrTy = MRI.getType(Dst);
9394 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
9401 MI.eraseFromParent();
9407 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9412 MI.eraseFromParent();
9418 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9423 MI.eraseFromParent();
9429 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
9430 unsigned Offset =
MI.getOperand(2).getImm();
9433 if (SrcTy.isVector()) {
9434 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
9435 unsigned DstSize = DstTy.getSizeInBits();
9437 if ((
Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
9438 (
Offset + DstSize <= SrcTy.getSizeInBits())) {
9440 auto Unmerge =
MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
9444 for (
unsigned Idx =
Offset / SrcEltSize;
9445 Idx < (
Offset + DstSize) / SrcEltSize; ++Idx) {
9446 SubVectorElts.
push_back(Unmerge.getReg(Idx));
9448 if (SubVectorElts.
size() == 1)
9449 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
9451 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
9453 MI.eraseFromParent();
9459 if ((SrcTy.isPointer() &&
9460 DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) ||
9461 (DstTy.isPointer() &&
9462 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace()))) {
9463 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
9467 if ((DstTy.isScalar() || DstTy.isPointer()) &&
9468 (SrcTy.isScalar() || SrcTy.isPointer() ||
9469 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
9470 LLT SrcIntTy = SrcTy;
9471 if (!SrcTy.isScalar()) {
9473 SrcReg =
MIRBuilder.buildCast(SrcIntTy, SrcReg).getReg(0);
9477 if (DstTy.isPointer())
9479 MRI.createGenericVirtualRegister(
LLT::scalar(DstTy.getSizeInBits()));
9485 auto Shr =
MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
9489 if (DstTy.isPointer())
9492 MI.eraseFromParent();
9500 auto [Dst, Src, InsertSrc] =
MI.getFirst3Regs();
9503 LLT DstTy = MRI.getType(Src);
9504 LLT InsertTy = MRI.getType(InsertSrc);
9507 bool IsNonIntegralInsert =
9517 if ((IsNonIntegralInsert || IsNonIntegralDst) && InsertTy != EltTy) {
9518 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
9525 if ((
Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
9527 auto UnmergeSrc =
MIRBuilder.buildUnmerge(EltTy, Src);
9531 for (; Idx <
Offset / EltSize; ++Idx) {
9532 DstElts.
push_back(UnmergeSrc.getReg(Idx));
9537 auto UnmergeInsertSrc =
MIRBuilder.buildUnmerge(EltTy, InsertSrc);
9538 for (
unsigned i = 0; Idx < (
Offset + InsertSize) / EltSize;
9540 DstElts.
push_back(UnmergeInsertSrc.getReg(i));
9544 InsertSrc =
MIRBuilder.buildPtrToInt(EltTy, InsertSrc).getReg(0);
9546 InsertSrc =
MIRBuilder.buildIntToPtr(EltTy, InsertSrc).getReg(0);
9553 DstElts.
push_back(UnmergeSrc.getReg(Idx));
9556 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
9557 MI.eraseFromParent();
9566 if (IsNonIntegralDst || IsNonIntegralInsert) {
9567 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
9571 LLT IntDstTy = DstTy;
9575 Src =
MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
9580 InsertSrc =
MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
9586 ExtInsSrc =
MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
9592 auto Mask =
MIRBuilder.buildConstant(IntDstTy, MaskVal);
9593 auto MaskedSrc =
MIRBuilder.buildAnd(IntDstTy, Src, Mask);
9594 auto Or =
MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
9597 MI.eraseFromParent();
9603 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
9604 MI.getFirst4RegLLTs();
9605 const bool IsAdd =
MI.getOpcode() == TargetOpcode::G_SADDO;
9608 LLT BoolTy = Dst1Ty;
9610 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
9627 auto ResultLowerThanLHS =
9632 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
9635 MI.eraseFromParent();
9641 auto [Res, OvOut, LHS, RHS, CarryIn] =
MI.getFirst5Regs();
9642 const LLT Ty = MRI.getType(Res);
9645 auto Tmp =
MIRBuilder.buildAdd(Ty, LHS, RHS);
9646 auto CarryZ =
MIRBuilder.buildZExt(Ty, CarryIn);
9647 auto Sum =
MIRBuilder.buildAdd(Ty, Tmp, CarryZ);
9658 MI.eraseFromParent();
9663 auto [Res, OvOut, LHS, RHS, CarryIn] =
MI.getFirst5Regs();
9664 const LLT Ty = MRI.getType(Res);
9667 auto CarryZ =
MIRBuilder.buildZExt(Ty, CarryIn);
9668 auto RHSPlusCI =
MIRBuilder.buildAdd(Ty, RHS, CarryZ);
9669 auto Diff =
MIRBuilder.buildSub(Ty, LHS, RHSPlusCI);
9674 auto X2 =
MIRBuilder.buildXor(Ty, LHS, Diff);
9679 MI.eraseFromParent();
9685 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
9686 LLT Ty = MRI.getType(Res);
9690 switch (
MI.getOpcode()) {
9693 case TargetOpcode::G_UADDSAT:
9696 BaseOp = TargetOpcode::G_ADD;
9698 case TargetOpcode::G_SADDSAT:
9701 BaseOp = TargetOpcode::G_ADD;
9703 case TargetOpcode::G_USUBSAT:
9706 BaseOp = TargetOpcode::G_SUB;
9708 case TargetOpcode::G_SSUBSAT:
9711 BaseOp = TargetOpcode::G_SUB;
9726 uint64_t NumBits = Ty.getScalarSizeInBits();
9737 auto NegOne =
MIRBuilder.buildConstant(Ty, -1);
9745 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
9750 auto Min =
MIRBuilder.buildUMin(Ty, Not, RHS);
9751 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
9754 MI.eraseFromParent();
9760 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
9761 LLT Ty = MRI.getType(Res);
9765 unsigned OverflowOp;
9766 switch (
MI.getOpcode()) {
9769 case TargetOpcode::G_UADDSAT:
9772 OverflowOp = TargetOpcode::G_UADDO;
9774 case TargetOpcode::G_SADDSAT:
9777 OverflowOp = TargetOpcode::G_SADDO;
9779 case TargetOpcode::G_USUBSAT:
9782 OverflowOp = TargetOpcode::G_USUBO;
9784 case TargetOpcode::G_SSUBSAT:
9787 OverflowOp = TargetOpcode::G_SSUBO;
9792 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
9793 Register Tmp = OverflowRes.getReg(0);
9794 Register Ov = OverflowRes.getReg(1);
9803 uint64_t NumBits = Ty.getScalarSizeInBits();
9804 auto ShiftAmount =
MIRBuilder.buildConstant(Ty, NumBits - 1);
9805 auto Sign =
MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
9808 Clamp =
MIRBuilder.buildAdd(Ty, Sign, MinVal);
9816 Clamp =
MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
9820 MI.eraseFromParent();
9826 assert((
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
9827 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
9828 "Expected shlsat opcode!");
9829 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SSHLSAT;
9830 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
9831 LLT Ty = MRI.getType(Res);
9835 auto Result =
MIRBuilder.buildShl(Ty, LHS, RHS);
9836 auto Orig = IsSigned ?
MIRBuilder.buildAShr(Ty, Result, RHS)
9845 SatVal =
MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
9850 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
9852 MI.eraseFromParent();
9857 auto [Dst, Src] =
MI.getFirst2Regs();
9858 const LLT Ty = MRI.getType(Src);
9859 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
9860 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
9863 auto ShiftAmt =
MIRBuilder.buildConstant(Ty, BaseShiftAmt);
9864 auto LSByteShiftedLeft =
MIRBuilder.buildShl(Ty, Src, ShiftAmt);
9865 auto MSByteShiftedRight =
MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9866 auto Res =
MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
9869 for (
unsigned i = 1; i < SizeInBytes / 2; ++i) {
9871 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
9872 auto Mask =
MIRBuilder.buildConstant(Ty, APMask);
9873 auto ShiftAmt =
MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
9875 auto LoByte =
MIRBuilder.buildAnd(Ty, Src, Mask);
9876 auto LoShiftedLeft =
MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
9877 Res =
MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
9879 auto SrcShiftedRight =
MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9880 auto HiShiftedRight =
MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
9881 Res =
MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
9883 Res.getInstr()->getOperand(0).setReg(Dst);
9885 MI.eraseFromParent();
9892 const LLT Ty = Dst.getLLTTy(*
B.getMRI());
9895 auto LHS =
B.buildLShr(Ty,
B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
9896 auto RHS =
B.buildAnd(Ty,
B.buildShl(Ty, Src, C_N), MaskLoNTo0);
9897 return B.buildOr(Dst,
LHS,
RHS);
9902 auto [Dst, Src] =
MI.getFirst2Regs();
9903 const LLT SrcTy = MRI.getType(Src);
9904 unsigned Size = SrcTy.getScalarSizeInBits();
9905 unsigned VSize = SrcTy.getSizeInBits();
9908 if (SrcTy.isVector() && (VSize % 8 == 0) &&
9909 (LI.isLegal({TargetOpcode::G_BITREVERSE,
9910 {LLT::fixed_vector(VSize / 8, 8),
9911 LLT::fixed_vector(VSize / 8, 8)}}))) {
9916 auto BSWAP =
MIRBuilder.buildBSwap(SrcTy, Src);
9917 auto Cast =
MIRBuilder.buildBitcast(VTy, BSWAP);
9918 auto RBIT =
MIRBuilder.buildBitReverse(VTy, Cast);
9922 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {SrcTy}, {Src});
9945 for (
unsigned I = 0, J =
Size - 1;
I <
Size; ++
I, --J) {
9949 Tmp2 = MIRBuilder.
buildShl(SrcTy, Src, ShAmt);
9952 Tmp2 = MIRBuilder.
buildLShr(SrcTy, Src, ShAmt);
9956 Tmp2 = MIRBuilder.
buildAnd(SrcTy, Tmp2, Mask);
9960 Tmp = MIRBuilder.
buildOr(SrcTy, Tmp, Tmp2);
9965 MI.eraseFromParent();
9973 bool IsRead =
MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
9974 int NameOpIdx = IsRead ? 1 : 0;
9975 int ValRegIndex = IsRead ? 0 : 1;
9977 Register ValReg =
MI.getOperand(ValRegIndex).getReg();
9978 const LLT Ty = MRI.getType(ValReg);
9980 cast<MDNode>(
MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
9987 (IsRead ?
"llvm.read_register" :
"llvm.write_register"),
9988 Fn,
MI.getDebugLoc()));
9992 MI.eraseFromParent();
10001 MI.eraseFromParent();
10007 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULH;
10008 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
10009 Register Result =
MI.getOperand(0).getReg();
10010 LLT OrigTy = MRI.getType(Result);
10014 auto LHS =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
MI.getOperand(1)});
10015 auto RHS =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
MI.getOperand(2)});
10017 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
10019 auto ShiftAmt =
MIRBuilder.buildConstant(WideTy, SizeInBits);
10020 auto Shifted =
MIRBuilder.buildInstr(ShiftOp, {WideTy}, {
Mul, ShiftAmt});
10023 MI.eraseFromParent();
10029 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
10034 MI.eraseFromParent();
10039 MI.eraseFromParent();
10046 unsigned BitSize = SrcTy.getScalarSizeInBits();
10050 auto AsInt =
MIRBuilder.buildCopy(IntTy, SrcReg);
10056 APInt ExpMask = Inf;
10058 APInt QNaNBitMask =
10062 auto SignBitC =
MIRBuilder.buildConstant(IntTy, SignBit);
10063 auto ValueMaskC =
MIRBuilder.buildConstant(IntTy, ValueMask);
10064 auto InfC =
MIRBuilder.buildConstant(IntTy, Inf);
10065 auto ExpMaskC =
MIRBuilder.buildConstant(IntTy, ExpMask);
10066 auto ZeroC =
MIRBuilder.buildConstant(IntTy, 0);
10068 auto Abs =
MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
10072 auto Res =
MIRBuilder.buildConstant(DstTy, 0);
10074 LLT DstTyCopy = DstTy;
10076 Res =
MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
10104 auto ExpBits =
MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
10107 Mask &= ~PartialCheck;
10116 else if (PartialCheck ==
fcZero)
10128 auto OneC =
MIRBuilder.buildConstant(IntTy, 1);
10129 auto VMinusOne =
MIRBuilder.buildSub(IntTy, V, OneC);
10130 auto SubnormalRes =
10132 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
10134 SubnormalRes =
MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
10135 appendToRes(SubnormalRes);
10142 else if (PartialCheck ==
fcInf)
10147 auto NegInfC =
MIRBuilder.buildConstant(IntTy, NegInf);
10154 auto InfWithQnanBitC =
MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
10155 if (PartialCheck ==
fcNan) {
10159 }
else if (PartialCheck ==
fcQNan) {
10169 Abs, InfWithQnanBitC);
10170 appendToRes(
MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
10177 APInt ExpLSB = ExpMask & ~(ExpMask.
shl(1));
10179 IntTy, Abs,
MIRBuilder.buildConstant(IntTy, ExpLSB));
10180 APInt MaxExpMinusOne = ExpMask - ExpLSB;
10183 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
10185 NormalRes =
MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
10188 DstTy, Sign,
MIRBuilder.buildConstant(DstTy, InversionMask));
10189 NormalRes =
MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
10191 appendToRes(NormalRes);
10195 MI.eraseFromParent();
10201 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
10202 MI.getFirst4RegLLTs();
10204 bool IsEltPtr = DstTy.isPointerOrPointerVector();
10208 Op1Reg =
MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
10209 Op2Reg =
MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
10213 if (MaskTy.isScalar()) {
10221 MaskElt =
MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
10225 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
10227 if (DstTy.isVector()) {
10229 auto ShufSplat =
MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
10230 MaskReg = ShufSplat.getReg(0);
10235 }
else if (!DstTy.isVector()) {
10240 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
10244 auto NotMask =
MIRBuilder.buildNot(MaskTy, MaskReg);
10245 auto NewOp1 =
MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
10246 auto NewOp2 =
MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
10253 MI.eraseFromParent();
10259 unsigned Opcode =
MI.getOpcode();
10262 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
10263 : TargetOpcode::G_UDIV,
10264 {
MI.getOperand(0).getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
10266 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
10267 : TargetOpcode::G_UREM,
10268 {
MI.getOperand(1).getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
10269 MI.eraseFromParent();
10279 LLT DstTy = MRI.getType(
MI.getOperand(0).getReg());
10283 auto Shift =
MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
10286 MI.eraseFromParent();
10296 Register SrcReg =
MI.getOperand(1).getReg();
10297 LLT Ty = MRI.getType(SrcReg);
10298 auto Zero =
MIRBuilder.buildConstant(Ty, 0);
10301 MI.eraseFromParent();
10307 Register SrcReg =
MI.getOperand(1).getReg();
10308 Register DestReg =
MI.getOperand(0).getReg();
10310 auto Zero =
MIRBuilder.buildConstant(Ty, 0).getReg(0);
10311 auto Sub =
MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
10314 MI.eraseFromParent();
10320 assert((
MI.getOpcode() == TargetOpcode::G_ABDS ||
10321 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10322 "Expected G_ABDS or G_ABDU instruction");
10324 auto [DstReg, LHS, RHS] =
MI.getFirst3Regs();
10325 LLT Ty = MRI.getType(LHS);
10335 MIRBuilder.buildSelect(DstReg, ICmp, LHSSub, RHSSub);
10337 MI.eraseFromParent();
10343 assert((
MI.getOpcode() == TargetOpcode::G_ABDS ||
10344 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10345 "Expected G_ABDS or G_ABDU instruction");
10347 auto [DstReg, LHS, RHS] =
MI.getFirst3Regs();
10348 LLT Ty = MRI.getType(LHS);
10353 if (
MI.getOpcode() == TargetOpcode::G_ABDS) {
10354 MaxReg =
MIRBuilder.buildSMax(Ty, LHS, RHS).getReg(0);
10355 MinReg =
MIRBuilder.buildSMin(Ty, LHS, RHS).getReg(0);
10357 MaxReg =
MIRBuilder.buildUMax(Ty, LHS, RHS).getReg(0);
10358 MinReg =
MIRBuilder.buildUMin(Ty, LHS, RHS).getReg(0);
10360 MIRBuilder.buildSub(DstReg, MaxReg, MinReg);
10362 MI.eraseFromParent();
10367 Register SrcReg =
MI.getOperand(1).getReg();
10368 Register DstReg =
MI.getOperand(0).getReg();
10370 LLT Ty = MRI.getType(DstReg);
10378 MI.eraseFromParent();
10384 Register SrcReg =
MI.getOperand(1).getReg();
10385 LLT SrcTy = MRI.getType(SrcReg);
10386 LLT DstTy = MRI.getType(SrcReg);
10389 if (SrcTy.isScalar()) {
10394 MI.setDesc(
MIRBuilder.getTII().get(TargetOpcode::COPY));
10405 Register ListPtr =
MI.getOperand(1).getReg();
10406 LLT PtrTy = MRI.getType(ListPtr);
10413 auto VAList =
MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
10415 const Align A(
MI.getOperand(2).getImm());
10417 if (
A > TLI.getMinStackArgumentAlignment()) {
10419 MIRBuilder.buildConstant(PtrTyAsScalarTy,
A.value() - 1).getReg(0);
10420 auto AddDst =
MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
10421 auto AndDst =
MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst,
Log2(
A));
10422 VAList = AndDst.getReg(0);
10429 LLT LLTTy = MRI.getType(Dst);
10432 MIRBuilder.buildConstant(PtrTyAsScalarTy,
DL.getTypeAllocSize(Ty));
10433 auto Succ =
MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
10438 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
10440 Align EltAlignment =
DL.getABITypeAlign(Ty);
10443 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
10445 MI.eraseFromParent();
10460 unsigned Limit,
const MemOp &
Op,
10461 unsigned DstAS,
unsigned SrcAS,
10462 const AttributeList &FuncAttributes,
10464 if (
Op.isMemcpyWithFixedDstAlign() &&
Op.getSrcAlign() <
Op.getDstAlign())
10474 if (
Op.isFixedDstAlign())
10475 while (
Op.getDstAlign() < Ty.getSizeInBytes() &&
10478 assert(Ty.getSizeInBits() > 0 &&
"Could not find valid type");
10482 unsigned NumMemOps = 0;
10485 unsigned TySize = Ty.getSizeInBytes();
10486 while (TySize >
Size) {
10495 assert(NewTySize > 0 &&
"Could not find appropriate type");
10502 if (NumMemOps &&
Op.allowOverlap() && NewTySize <
Size &&
10504 VT, DstAS,
Op.isFixedDstAlign() ?
Op.getDstAlign() :
Align(1),
10510 TySize = NewTySize;
10514 if (++NumMemOps > Limit)
10517 MemOps.push_back(Ty);
10527 unsigned NumBits = Ty.getScalarSizeInBits();
10529 if (!Ty.isVector() && ValVRegAndVal) {
10530 APInt Scalar = ValVRegAndVal->Value.
trunc(8);
10538 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
10559 uint64_t KnownLen,
Align Alignment,
10561 auto &MF = *
MI.getParent()->getParent();
10566 assert(KnownLen != 0 &&
"Have a zero length memset length!");
10568 bool DstAlignCanChange =
false;
10572 MachineInstr *FIDef =
getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10574 DstAlignCanChange =
true;
10576 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
10577 std::vector<LLT> MemOps;
10579 const auto &DstMMO = **
MI.memoperands_begin();
10580 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10583 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
10594 if (DstAlignCanChange) {
10597 Align NewAlign =
DL.getABITypeAlign(IRTy);
10598 if (NewAlign > Alignment) {
10599 Alignment = NewAlign;
10607 MachineIRBuilder MIB(
MI);
10609 LLT LargestTy = MemOps[0];
10610 for (
unsigned i = 1; i < MemOps.size(); i++)
10612 LargestTy = MemOps[i];
10624 LLT PtrTy = MRI.getType(Dst);
10625 unsigned DstOff = 0;
10626 unsigned Size = KnownLen;
10627 for (
unsigned I = 0;
I < MemOps.size();
I++) {
10628 LLT Ty = MemOps[
I];
10630 if (TySize >
Size) {
10633 assert(
I == MemOps.size() - 1 &&
I != 0);
10634 DstOff -= TySize -
Size;
10644 TLI.isTruncateFree(LargestVT, VT))
10645 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
10658 Ptr = MIB.buildObjectPtrOffset(PtrTy, Dst,
Offset).getReg(0);
10661 MIB.buildStore(
Value, Ptr, *StoreMMO);
10666 MI.eraseFromParent();
10672 assert(
MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10674 auto [Dst, Src, Len] =
MI.getFirst3Regs();
10676 const auto *MMOIt =
MI.memoperands_begin();
10678 bool IsVolatile =
MemOp->isVolatile();
10684 "inline memcpy with dynamic size is not yet supported");
10685 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10686 if (KnownLen == 0) {
10687 MI.eraseFromParent();
10691 const auto &DstMMO = **
MI.memoperands_begin();
10692 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
10693 Align DstAlign = DstMMO.getBaseAlign();
10694 Align SrcAlign = SrcMMO.getBaseAlign();
10696 return lowerMemcpyInline(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
10703 Align SrcAlign,
bool IsVolatile) {
10704 assert(
MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10705 return lowerMemcpy(
MI, Dst, Src, KnownLen,
10706 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
10713 Align SrcAlign,
bool IsVolatile) {
10714 auto &MF = *
MI.getParent()->getParent();
10719 assert(KnownLen != 0 &&
"Have a zero length memcpy length!");
10721 bool DstAlignCanChange =
false;
10723 Align Alignment = std::min(DstAlign, SrcAlign);
10727 DstAlignCanChange =
true;
10733 std::vector<LLT> MemOps;
10735 const auto &DstMMO = **
MI.memoperands_begin();
10736 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
10742 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10748 if (DstAlignCanChange) {
10751 Align NewAlign =
DL.getABITypeAlign(IRTy);
10756 if (!
TRI->hasStackRealignment(MF))
10758 NewAlign = std::min(NewAlign, *StackAlign);
10760 if (NewAlign > Alignment) {
10761 Alignment = NewAlign;
10769 LLVM_DEBUG(
dbgs() <<
"Inlining memcpy: " <<
MI <<
" into loads & stores\n");
10771 MachineIRBuilder MIB(
MI);
10777 unsigned CurrOffset = 0;
10778 unsigned Size = KnownLen;
10779 for (
auto CopyTy : MemOps) {
10782 if (CopyTy.getSizeInBytes() >
Size)
10783 CurrOffset -= CopyTy.getSizeInBytes() -
Size;
10794 if (CurrOffset != 0) {
10795 LLT SrcTy = MRI.getType(Src);
10798 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src,
Offset).getReg(0);
10800 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
10804 if (CurrOffset != 0) {
10805 LLT DstTy = MRI.getType(Dst);
10806 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst,
Offset).getReg(0);
10808 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
10809 CurrOffset += CopyTy.getSizeInBytes();
10810 Size -= CopyTy.getSizeInBytes();
10813 MI.eraseFromParent();
10819 uint64_t KnownLen,
Align DstAlign,
Align SrcAlign,
10821 auto &MF = *
MI.getParent()->getParent();
10826 assert(KnownLen != 0 &&
"Have a zero length memmove length!");
10828 bool DstAlignCanChange =
false;
10831 Align Alignment = std::min(DstAlign, SrcAlign);
10833 MachineInstr *FIDef =
getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10835 DstAlignCanChange =
true;
10837 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
10838 std::vector<LLT> MemOps;
10840 const auto &DstMMO = **
MI.memoperands_begin();
10841 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
10842 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10843 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10850 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10856 if (DstAlignCanChange) {
10859 Align NewAlign =
DL.getABITypeAlign(IRTy);
10864 if (!
TRI->hasStackRealignment(MF))
10865 if (MaybeAlign StackAlign =
DL.getStackAlignment())
10866 NewAlign = std::min(NewAlign, *StackAlign);
10868 if (NewAlign > Alignment) {
10869 Alignment = NewAlign;
10877 LLVM_DEBUG(
dbgs() <<
"Inlining memmove: " <<
MI <<
" into loads & stores\n");
10879 MachineIRBuilder MIB(
MI);
10883 unsigned CurrOffset = 0;
10884 SmallVector<Register, 16> LoadVals;
10885 for (
auto CopyTy : MemOps) {
10892 if (CurrOffset != 0) {
10893 LLT SrcTy = MRI.getType(Src);
10896 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src,
Offset).getReg(0);
10898 LoadVals.
push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
10899 CurrOffset += CopyTy.getSizeInBytes();
10903 for (
unsigned I = 0;
I < MemOps.size(); ++
I) {
10904 LLT CopyTy = MemOps[
I];
10910 if (CurrOffset != 0) {
10911 LLT DstTy = MRI.getType(Dst);
10914 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst,
Offset).getReg(0);
10916 MIB.buildStore(LoadVals[
I], StorePtr, *StoreMMO);
10919 MI.eraseFromParent();
10925 const unsigned Opc =
MI.getOpcode();
10928 assert((
Opc == TargetOpcode::G_MEMCPY ||
Opc == TargetOpcode::G_MEMMOVE ||
10929 Opc == TargetOpcode::G_MEMSET) &&
10930 "Expected memcpy like instruction");
10932 auto MMOIt =
MI.memoperands_begin();
10937 auto [Dst, Src, Len] =
MI.getFirst3Regs();
10939 if (
Opc != TargetOpcode::G_MEMSET) {
10940 assert(MMOIt !=
MI.memoperands_end() &&
"Expected a second MMO on MI");
10941 MemOp = *(++MMOIt);
10942 SrcAlign =
MemOp->getBaseAlign();
10947 if (!LenVRegAndVal)
10949 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10951 if (KnownLen == 0) {
10952 MI.eraseFromParent();
10956 if (MaxLen && KnownLen > MaxLen)
10959 bool IsVolatile =
MemOp->isVolatile();
10960 if (
Opc == TargetOpcode::G_MEMCPY) {
10961 auto &MF = *
MI.getParent()->getParent();
10964 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
10965 return lowerMemcpy(
MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
10968 if (
Opc == TargetOpcode::G_MEMMOVE)
10969 return lowerMemmove(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
10970 if (
Opc == TargetOpcode::G_MEMSET)
10971 return lowerMemset(
MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static LegalizerHelper::LegalizeResult loweri64tof16ITOFP(MachineInstr &MI, Register Dst, LLT DstTy, Register Src, LLT SrcTy, MachineIRBuilder &MIRBuilder)
i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16.
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
static const fltSemantics & IEEEsingle()
static constexpr roundingMode rmTowardZero
static const fltSemantics & IEEEdouble()
static constexpr roundingMode rmNearestTiesToEven
opStatus
IEEE-754R 7: Default exception handling.
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
APInt bitcastToAPInt() const
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
uint64_t getZExtValue() const
Get zero extended value.
unsigned getActiveBits() const
Compute the number of active bits in the value.
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
void negate()
Negate this APInt in place.
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
APInt shl(unsigned shiftAmt) const
Left-shift function.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
AttributeList getAttributes() const
Return the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Register getSubVec() const
Register getBigVec() const
uint64_t getIndexImm() const
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
Represents a threeway compare.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isPointerOrPointerVector() const
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr LLT changeVectorElementType(LLT NewEltTy) const
Returns a vector with the same number of elements but the new element type.
constexpr LLT getScalarType() const
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
constexpr LLT changeVectorElementCount(ElementCount EC) const
Return a vector with the same element type and the new element count.
This is an important class for using LLVM in a threaded context.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LLVM_ABI LegalizeResult lowerShlSat(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LLVM_ABI LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LLVM_ABI LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSSUBE(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerLoad(GAnyLoad &MI)
LLVM_ABI LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
LLVM_ABI void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizerHelper::LegalizeResult createAtomicLibcall(MachineInstr &MI) const
LLVM_ABI LegalizeResult lowerFConstant(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerBitreverse(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LLVM_ABI LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTLS(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerEXT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerStore(GStore &MI)
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult narrowScalarShiftMultiway(MachineInstr &MI, LLT TargetTy)
Multi-way shift legalization: directly split wide shifts into target-sized parts in a single step,...
LLVM_ABI LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI Register buildConstantShiftPart(unsigned Opcode, unsigned PartIdx, unsigned NumParts, ArrayRef< Register > SrcParts, const ShiftParams &Params, LLT TargetTy, LLT ShiftAmtTy)
Generates a single output part for constant shifts using direct indexing.
LLVM_ABI void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LLVM_ABI LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LLVM_ABI LegalizeResult lowerFPTOUI(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LLVM_ABI LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LLVM_ABI LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B, const LibcallLoweringInfo *Libcalls=nullptr)
LLVM_ABI LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LLVM_ABI LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LLVM_ABI LegalizeResult lowerBitcast(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerInsert(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtract(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LLVM_ABI LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPOWI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFAbs(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI)
const LegalizerInfo & getLegalizerInfo() const
Expose LegalizerInfo so the clients can re-use.
LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LLVM_ABI LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarShiftByConstantMultiway(MachineInstr &MI, const APInt &Amt, LLT TargetTy, LLT ShiftAmtTy)
Optimized path for constant shift amounts using static indexing.
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMODF(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LLVM_ABI LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSADDE(MachineInstr &MI)
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LLVM_ABI LegalizeResult lowerFunnelShift(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI LegalizeResult conversionLibcall(MachineInstr &MI, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, bool IsSigned=false) const
LLVM_ABI void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LLVM_ABI LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI)
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LLVM_ABI LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LLVM_ABI LegalizeResult lowerFPTOSI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerUITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerShuffleVector(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerMergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult createMemLibcall(MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Create a libcall to memcpy et al.
LLVM_ABI LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
LLVM_ABI void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LLVM_ABI LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult createLibcall(const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr) const
Helper function that creates a libcall to the given Name using the given calling convention CC.
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LLVM_ABI LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerDIVREM(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI Register buildVariableShiftPart(unsigned Opcode, Register MainOperand, Register ShiftAmt, LLT TargetTy, Register CarryOperand=Register())
Generates a shift part with carry for variable shifts.
LLVM_ABI void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
LLVM_ABI void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LLVM_ABI LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LLVM_ABI LegalizeResult lowerStackRestore(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerStackSave(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBswap(MachineInstr &MI)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LLVM_ABI LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LLVM_ABI LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver) const
LLVM_ABI LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
LLVM_ABI LegalizeResult lowerFMinimumMaximum(MachineInstr &MI)
Tracks which library functions to use for a particular subtarget.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
LLVM_ABI StringRef getString() const
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
LLVM_ABI iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLT getLLTTy(const MachineRegisterInfo &MRI) const
StringRef - Represent a constant reference to a string, i.e.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
@ UndefinedBooleanContent
@ ZeroOrNegativeOneBooleanContent
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, DriverKit, XROS, or bridgeOS).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
static LLVM_ABI Type * getX86_FP80Ty(LLVMContext &C)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Type * getType() const
All values are typed, get the type of this value.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_ABI LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
unsigned M1(unsigned Val)
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
auto dyn_cast_or_null(const Y &Val)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
@ Success
The lock was released successfully.
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
To bit_cast(const From &from) noexcept
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
unsigned Log2(Align A)
Returns the log2 of the alignment.
LLVM_ABI LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
@ Custom
The result values require a custom uniformity check.
LLVM_ABI void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
SmallVector< ISD::ArgFlagsTy, 4 > Flags
CallingConv::ID CallConv
Calling convention to be used for the call.
bool isKnownNeverZero() const
Return true if it's known this can never be a zero.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.