LLVM 23.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
36#include "llvm/Support/Debug.h"
40#include <numeric>
41#include <optional>
42
43#define DEBUG_TYPE "legalizer"
44
45using namespace llvm;
46using namespace LegalizeActions;
47using namespace MIPatternMatch;
48
49/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
50///
51/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
52/// with any leftover piece as type \p LeftoverTy
53///
54/// Returns -1 in the first element of the pair if the breakdown is not
55/// satisfiable.
56static std::pair<int, int>
57getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
58 assert(!LeftoverTy.isValid() && "this is an out argument");
59
60 unsigned Size = OrigTy.getSizeInBits();
61 unsigned NarrowSize = NarrowTy.getSizeInBits();
62 unsigned NumParts = Size / NarrowSize;
63 unsigned LeftoverSize = Size - NumParts * NarrowSize;
64 assert(Size > NarrowSize);
65
66 if (LeftoverSize == 0)
67 return {NumParts, 0};
68
69 if (NarrowTy.isVector()) {
70 unsigned EltSize = OrigTy.getScalarSizeInBits();
71 if (LeftoverSize % EltSize != 0)
72 return {-1, -1};
73 LeftoverTy = OrigTy.changeElementCount(
74 ElementCount::getFixed(LeftoverSize / EltSize));
75 } else {
76 LeftoverTy = LLT::scalar(LeftoverSize);
77 }
78
79 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
80 return std::make_pair(NumParts, NumLeftover);
81}
82
84
85 if (!Ty.isScalar())
86 return nullptr;
87
88 switch (Ty.getSizeInBits()) {
89 case 16:
90 return Type::getHalfTy(Ctx);
91 case 32:
92 return Type::getFloatTy(Ctx);
93 case 64:
94 return Type::getDoubleTy(Ctx);
95 case 80:
96 return Type::getX86_FP80Ty(Ctx);
97 case 128:
98 return Type::getFP128Ty(Ctx);
99 default:
100 return nullptr;
101 }
102}
103
106 MachineIRBuilder &Builder,
107 const LibcallLoweringInfo *Libcalls)
108 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
109 LI(*MF.getSubtarget().getLegalizerInfo()),
110 TLI(*MF.getSubtarget().getTargetLowering()), Libcalls(Libcalls) {}
111
115 const LibcallLoweringInfo *Libcalls,
117 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
118 TLI(*MF.getSubtarget().getTargetLowering()), Libcalls(Libcalls), VT(VT) {}
119
122 LostDebugLocObserver &LocObserver) {
123 LLVM_DEBUG(dbgs() << "\nLegalizing: " << MI);
124
125 MIRBuilder.setInstrAndDebugLoc(MI);
126
127 if (isa<GIntrinsic>(MI))
128 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
129 auto Step = LI.getAction(MI, MRI);
130 switch (Step.Action) {
131 case Legal:
132 LLVM_DEBUG(dbgs() << ".. Already legal\n");
133 return AlreadyLegal;
134 case Libcall:
135 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
136 return libcall(MI, LocObserver);
137 case NarrowScalar:
138 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
139 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
140 case WidenScalar:
141 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
142 return widenScalar(MI, Step.TypeIdx, Step.NewType);
143 case Bitcast:
144 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
145 return bitcast(MI, Step.TypeIdx, Step.NewType);
146 case Lower:
147 LLVM_DEBUG(dbgs() << ".. Lower\n");
148 return lower(MI, Step.TypeIdx, Step.NewType);
149 case FewerElements:
150 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
151 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
152 case MoreElements:
153 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
154 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
155 case Custom:
156 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
157 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
159 default:
160 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
161 return UnableToLegalize;
162 }
163}
164
165void LegalizerHelper::insertParts(Register DstReg,
166 LLT ResultTy, LLT PartTy,
167 ArrayRef<Register> PartRegs,
168 LLT LeftoverTy,
169 ArrayRef<Register> LeftoverRegs) {
170 if (!LeftoverTy.isValid()) {
171 assert(LeftoverRegs.empty());
172
173 if (!ResultTy.isVector()) {
174 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
175 return;
176 }
177
178 if (PartTy.isVector())
179 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
180 else
181 MIRBuilder.buildBuildVector(DstReg, PartRegs);
182 return;
183 }
184
185 // Merge sub-vectors with different number of elements and insert into DstReg.
186 if (ResultTy.isVector()) {
187 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
188 SmallVector<Register, 8> AllRegs(PartRegs);
189 AllRegs.append(LeftoverRegs.begin(), LeftoverRegs.end());
190 return mergeMixedSubvectors(DstReg, AllRegs);
191 }
192
193 SmallVector<Register> GCDRegs;
194 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
195 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
196 extractGCDType(GCDRegs, GCDTy, PartReg);
197 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
198 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
199}
200
201void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
202 Register Reg) {
203 LLT Ty = MRI.getType(Reg);
205 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
206 MIRBuilder, MRI);
207 Elts.append(RegElts);
208}
209
210/// Merge \p PartRegs with different types into \p DstReg.
211void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
212 ArrayRef<Register> PartRegs) {
214 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
215 appendVectorElts(AllElts, PartRegs[i]);
216
217 Register Leftover = PartRegs[PartRegs.size() - 1];
218 if (!MRI.getType(Leftover).isVector())
219 AllElts.push_back(Leftover);
220 else
221 appendVectorElts(AllElts, Leftover);
222
223 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
224}
225
226/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
228 const MachineInstr &MI) {
229 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
230
231 const int StartIdx = Regs.size();
232 const int NumResults = MI.getNumOperands() - 1;
233 Regs.resize(Regs.size() + NumResults);
234 for (int I = 0; I != NumResults; ++I)
235 Regs[StartIdx + I] = MI.getOperand(I).getReg();
236}
237
238void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
239 LLT GCDTy, Register SrcReg) {
240 LLT SrcTy = MRI.getType(SrcReg);
241 if (SrcTy == GCDTy) {
242 // If the source already evenly divides the result type, we don't need to do
243 // anything.
244 Parts.push_back(SrcReg);
245 } else {
246 // Need to split into common type sized pieces.
247 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
248 getUnmergeResults(Parts, *Unmerge);
249 }
250}
251
252LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
253 LLT NarrowTy, Register SrcReg) {
254 LLT SrcTy = MRI.getType(SrcReg);
255 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
256 extractGCDType(Parts, GCDTy, SrcReg);
257 return GCDTy;
258}
259
260LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
262 unsigned PadStrategy) {
263 LLT LCMTy = getLCMType(DstTy, NarrowTy);
264
265 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
266 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
267 int NumOrigSrc = VRegs.size();
268
269 Register PadReg;
270
271 // Get a value we can use to pad the source value if the sources won't evenly
272 // cover the result type.
273 if (NumOrigSrc < NumParts * NumSubParts) {
274 if (PadStrategy == TargetOpcode::G_ZEXT)
275 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
276 else if (PadStrategy == TargetOpcode::G_ANYEXT)
277 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
278 else {
279 assert(PadStrategy == TargetOpcode::G_SEXT);
280
281 // Shift the sign bit of the low register through the high register.
282 auto ShiftAmt =
283 MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
284 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
285 }
286 }
287
288 // Registers for the final merge to be produced.
289 SmallVector<Register, 4> Remerge(NumParts);
290
291 // Registers needed for intermediate merges, which will be merged into a
292 // source for Remerge.
293 SmallVector<Register, 4> SubMerge(NumSubParts);
294
295 // Once we've fully read off the end of the original source bits, we can reuse
296 // the same high bits for remaining padding elements.
297 Register AllPadReg;
298
299 // Build merges to the LCM type to cover the original result type.
300 for (int I = 0; I != NumParts; ++I) {
301 bool AllMergePartsArePadding = true;
302
303 // Build the requested merges to the requested type.
304 for (int J = 0; J != NumSubParts; ++J) {
305 int Idx = I * NumSubParts + J;
306 if (Idx >= NumOrigSrc) {
307 SubMerge[J] = PadReg;
308 continue;
309 }
310
311 SubMerge[J] = VRegs[Idx];
312
313 // There are meaningful bits here we can't reuse later.
314 AllMergePartsArePadding = false;
315 }
316
317 // If we've filled up a complete piece with padding bits, we can directly
318 // emit the natural sized constant if applicable, rather than a merge of
319 // smaller constants.
320 if (AllMergePartsArePadding && !AllPadReg) {
321 if (PadStrategy == TargetOpcode::G_ANYEXT)
322 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
323 else if (PadStrategy == TargetOpcode::G_ZEXT)
324 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
325
326 // If this is a sign extension, we can't materialize a trivial constant
327 // with the right type and have to produce a merge.
328 }
329
330 if (AllPadReg) {
331 // Avoid creating additional instructions if we're just adding additional
332 // copies of padding bits.
333 Remerge[I] = AllPadReg;
334 continue;
335 }
336
337 if (NumSubParts == 1)
338 Remerge[I] = SubMerge[0];
339 else
340 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
341
342 // In the sign extend padding case, re-use the first all-signbit merge.
343 if (AllMergePartsArePadding && !AllPadReg)
344 AllPadReg = Remerge[I];
345 }
346
347 VRegs = std::move(Remerge);
348 return LCMTy;
349}
350
351void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
352 ArrayRef<Register> RemergeRegs) {
353 LLT DstTy = MRI.getType(DstReg);
354
355 // Create the merge to the widened source, and extract the relevant bits into
356 // the result.
357
358 if (DstTy == LCMTy) {
359 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
360 return;
361 }
362
363 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
364 if (DstTy.isScalar() && LCMTy.isScalar()) {
365 MIRBuilder.buildTrunc(DstReg, Remerge);
366 return;
367 }
368
369 if (LCMTy.isVector()) {
370 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
371 SmallVector<Register, 8> UnmergeDefs(NumDefs);
372 UnmergeDefs[0] = DstReg;
373 for (unsigned I = 1; I != NumDefs; ++I)
374 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
375
376 MIRBuilder.buildUnmerge(UnmergeDefs,
377 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
378 return;
379 }
380
381 llvm_unreachable("unhandled case");
382}
383
384static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
385#define RTLIBCASE_INT(LibcallPrefix) \
386 do { \
387 switch (Size) { \
388 case 32: \
389 return RTLIB::LibcallPrefix##32; \
390 case 64: \
391 return RTLIB::LibcallPrefix##64; \
392 case 128: \
393 return RTLIB::LibcallPrefix##128; \
394 default: \
395 llvm_unreachable("unexpected size"); \
396 } \
397 } while (0)
398
399#define RTLIBCASE(LibcallPrefix) \
400 do { \
401 switch (Size) { \
402 case 32: \
403 return RTLIB::LibcallPrefix##32; \
404 case 64: \
405 return RTLIB::LibcallPrefix##64; \
406 case 80: \
407 return RTLIB::LibcallPrefix##80; \
408 case 128: \
409 return RTLIB::LibcallPrefix##128; \
410 default: \
411 llvm_unreachable("unexpected size"); \
412 } \
413 } while (0)
414
415 switch (Opcode) {
416 case TargetOpcode::G_LROUND:
417 RTLIBCASE(LROUND_F);
418 case TargetOpcode::G_LLROUND:
419 RTLIBCASE(LLROUND_F);
420 case TargetOpcode::G_MUL:
421 RTLIBCASE_INT(MUL_I);
422 case TargetOpcode::G_SDIV:
423 RTLIBCASE_INT(SDIV_I);
424 case TargetOpcode::G_UDIV:
425 RTLIBCASE_INT(UDIV_I);
426 case TargetOpcode::G_SREM:
427 RTLIBCASE_INT(SREM_I);
428 case TargetOpcode::G_UREM:
429 RTLIBCASE_INT(UREM_I);
430 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
431 RTLIBCASE_INT(CTLZ_I);
432 case TargetOpcode::G_FADD:
433 RTLIBCASE(ADD_F);
434 case TargetOpcode::G_FSUB:
435 RTLIBCASE(SUB_F);
436 case TargetOpcode::G_FMUL:
437 RTLIBCASE(MUL_F);
438 case TargetOpcode::G_FDIV:
439 RTLIBCASE(DIV_F);
440 case TargetOpcode::G_FEXP:
441 RTLIBCASE(EXP_F);
442 case TargetOpcode::G_FEXP2:
443 RTLIBCASE(EXP2_F);
444 case TargetOpcode::G_FEXP10:
445 RTLIBCASE(EXP10_F);
446 case TargetOpcode::G_FREM:
447 RTLIBCASE(REM_F);
448 case TargetOpcode::G_FPOW:
449 RTLIBCASE(POW_F);
450 case TargetOpcode::G_FPOWI:
451 RTLIBCASE(POWI_F);
452 case TargetOpcode::G_FMA:
453 RTLIBCASE(FMA_F);
454 case TargetOpcode::G_FSIN:
455 RTLIBCASE(SIN_F);
456 case TargetOpcode::G_FCOS:
457 RTLIBCASE(COS_F);
458 case TargetOpcode::G_FTAN:
459 RTLIBCASE(TAN_F);
460 case TargetOpcode::G_FASIN:
461 RTLIBCASE(ASIN_F);
462 case TargetOpcode::G_FACOS:
463 RTLIBCASE(ACOS_F);
464 case TargetOpcode::G_FATAN:
465 RTLIBCASE(ATAN_F);
466 case TargetOpcode::G_FATAN2:
467 RTLIBCASE(ATAN2_F);
468 case TargetOpcode::G_FSINH:
469 RTLIBCASE(SINH_F);
470 case TargetOpcode::G_FCOSH:
471 RTLIBCASE(COSH_F);
472 case TargetOpcode::G_FTANH:
473 RTLIBCASE(TANH_F);
474 case TargetOpcode::G_FSINCOS:
475 RTLIBCASE(SINCOS_F);
476 case TargetOpcode::G_FMODF:
477 RTLIBCASE(MODF_F);
478 case TargetOpcode::G_FLOG10:
479 RTLIBCASE(LOG10_F);
480 case TargetOpcode::G_FLOG:
481 RTLIBCASE(LOG_F);
482 case TargetOpcode::G_FLOG2:
483 RTLIBCASE(LOG2_F);
484 case TargetOpcode::G_FLDEXP:
485 RTLIBCASE(LDEXP_F);
486 case TargetOpcode::G_FCEIL:
487 RTLIBCASE(CEIL_F);
488 case TargetOpcode::G_FFLOOR:
489 RTLIBCASE(FLOOR_F);
490 case TargetOpcode::G_FMINNUM:
491 RTLIBCASE(FMIN_F);
492 case TargetOpcode::G_FMAXNUM:
493 RTLIBCASE(FMAX_F);
494 case TargetOpcode::G_FMINIMUMNUM:
495 RTLIBCASE(FMINIMUM_NUM_F);
496 case TargetOpcode::G_FMAXIMUMNUM:
497 RTLIBCASE(FMAXIMUM_NUM_F);
498 case TargetOpcode::G_FSQRT:
499 RTLIBCASE(SQRT_F);
500 case TargetOpcode::G_FRINT:
501 RTLIBCASE(RINT_F);
502 case TargetOpcode::G_FNEARBYINT:
503 RTLIBCASE(NEARBYINT_F);
504 case TargetOpcode::G_INTRINSIC_TRUNC:
505 RTLIBCASE(TRUNC_F);
506 case TargetOpcode::G_INTRINSIC_ROUND:
507 RTLIBCASE(ROUND_F);
508 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
509 RTLIBCASE(ROUNDEVEN_F);
510 case TargetOpcode::G_INTRINSIC_LRINT:
511 RTLIBCASE(LRINT_F);
512 case TargetOpcode::G_INTRINSIC_LLRINT:
513 RTLIBCASE(LLRINT_F);
514 }
515 llvm_unreachable("Unknown libcall function");
516#undef RTLIBCASE_INT
517#undef RTLIBCASE
518}
519
520/// True if an instruction is in tail position in its caller. Intended for
521/// legalizing libcalls as tail calls when possible.
524 const TargetInstrInfo &TII,
525 MachineRegisterInfo &MRI) {
526 MachineBasicBlock &MBB = *MI.getParent();
527 const Function &F = MBB.getParent()->getFunction();
528
529 // Conservatively require the attributes of the call to match those of
530 // the return. Ignore NoAlias and NonNull because they don't affect the
531 // call sequence.
532 AttributeList CallerAttrs = F.getAttributes();
533 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
534 .removeAttribute(Attribute::NoAlias)
535 .removeAttribute(Attribute::NonNull)
536 .hasAttributes())
537 return false;
538
539 // It's not safe to eliminate the sign / zero extension of the return value.
540 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
541 CallerAttrs.hasRetAttr(Attribute::SExt))
542 return false;
543
544 // Only tail call if the following instruction is a standard return or if we
545 // have a `thisreturn` callee, and a sequence like:
546 //
547 // G_MEMCPY %0, %1, %2
548 // $x0 = COPY %0
549 // RET_ReallyLR implicit $x0
550 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
551 if (Next != MBB.instr_end() && Next->isCopy()) {
552 if (MI.getOpcode() == TargetOpcode::G_BZERO)
553 return false;
554
555 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
556 // mempy/etc routines return the same parameter. For other it will be the
557 // returned value.
558 Register VReg = MI.getOperand(0).getReg();
559 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
560 return false;
561
562 Register PReg = Next->getOperand(0).getReg();
563 if (!PReg.isPhysical())
564 return false;
565
566 auto Ret = next_nodbg(Next, MBB.instr_end());
567 if (Ret == MBB.instr_end() || !Ret->isReturn())
568 return false;
569
570 if (Ret->getNumImplicitOperands() != 1)
571 return false;
572
573 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
574 return false;
575
576 // Skip over the COPY that we just validated.
577 Next = Ret;
578 }
579
580 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
581 return false;
582
583 return true;
584}
585
587 const char *Name, const CallLowering::ArgInfo &Result,
589 LostDebugLocObserver &LocObserver, MachineInstr *MI) const {
590 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
591
593 Info.CallConv = CC;
594 Info.Callee = MachineOperand::CreateES(Name);
595 Info.OrigRet = Result;
596 if (MI)
597 Info.IsTailCall =
598 (Result.Ty->isVoidTy() ||
599 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
600 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
601 *MIRBuilder.getMRI());
602
603 llvm::append_range(Info.OrigArgs, Args);
604 if (!CLI.lowerCall(MIRBuilder, Info))
606
607 if (MI && Info.LoweredTailCall) {
608 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
609
610 // Check debug locations before removing the return.
611 LocObserver.checkpoint(true);
612
613 // We must have a return following the call (or debug insts) to get past
614 // isLibCallInTailPosition.
615 do {
616 MachineInstr *Next = MI->getNextNode();
617 assert(Next &&
618 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
619 "Expected instr following MI to be return or debug inst?");
620 // We lowered a tail call, so the call is now the return from the block.
621 // Delete the old return.
622 Next->eraseFromParent();
623 } while (MI->getNextNode());
624
625 // We expect to lose the debug location from the return.
626 LocObserver.checkpoint(false);
627 }
629}
630
632 RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result,
634 MachineInstr *MI) const {
635 if (!Libcalls)
637
638 RTLIB::LibcallImpl LibcallImpl = Libcalls->getLibcallImpl(Libcall);
639 if (LibcallImpl == RTLIB::Unsupported)
641
643 const CallingConv::ID CC = Libcalls->getLibcallImplCallingConv(LibcallImpl);
644 return createLibcall(Name.data(), Result, Args, CC, LocObserver, MI);
645}
646
647// Useful for libcalls where all operands have the same type.
650 unsigned Size, Type *OpType,
651 LostDebugLocObserver &LocObserver) const {
652 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
653
654 // FIXME: What does the original arg index mean here?
656 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
657 Args.push_back({MO.getReg(), OpType, 0});
658 return createLibcall(Libcall, {MI.getOperand(0).getReg(), OpType, 0}, Args,
659 LocObserver, &MI);
660}
661
662LegalizerHelper::LegalizeResult LegalizerHelper::emitSincosLibcall(
663 MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType,
664 LostDebugLocObserver &LocObserver) {
665 MachineFunction &MF = *MI.getMF();
667
668 Register DstSin = MI.getOperand(0).getReg();
669 Register DstCos = MI.getOperand(1).getReg();
670 Register Src = MI.getOperand(2).getReg();
671 LLT DstTy = MRI.getType(DstSin);
672
673 int MemSize = DstTy.getSizeInBytes();
674 Align Alignment = getStackTemporaryAlignment(DstTy);
676 unsigned AddrSpace = DL.getAllocaAddrSpace();
677 MachinePointerInfo PtrInfo;
678
679 Register StackPtrSin =
680 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
681 .getReg(0);
682 Register StackPtrCos =
683 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
684 .getReg(0);
685
686 auto &Ctx = MF.getFunction().getContext();
687 auto LibcallResult = createLibcall(
688 getRTLibDesc(MI.getOpcode(), Size), {{0}, Type::getVoidTy(Ctx), 0},
689 {{Src, OpType, 0},
690 {StackPtrSin, PointerType::get(Ctx, AddrSpace), 1},
691 {StackPtrCos, PointerType::get(Ctx, AddrSpace), 2}},
692 LocObserver, &MI);
693
694 if (LibcallResult != LegalizeResult::Legalized)
696
698 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
700 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
701
702 MIRBuilder.buildLoad(DstSin, StackPtrSin, *LoadMMOSin);
703 MIRBuilder.buildLoad(DstCos, StackPtrCos, *LoadMMOCos);
704 MI.eraseFromParent();
705
707}
708
710LegalizerHelper::emitModfLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder,
711 unsigned Size, Type *OpType,
712 LostDebugLocObserver &LocObserver) {
713 MachineFunction &MF = MIRBuilder.getMF();
714 MachineRegisterInfo &MRI = MF.getRegInfo();
715
716 Register DstFrac = MI.getOperand(0).getReg();
717 Register DstInt = MI.getOperand(1).getReg();
718 Register Src = MI.getOperand(2).getReg();
719 LLT DstTy = MRI.getType(DstFrac);
720
721 int MemSize = DstTy.getSizeInBytes();
722 Align Alignment = getStackTemporaryAlignment(DstTy);
723 const DataLayout &DL = MIRBuilder.getDataLayout();
724 unsigned AddrSpace = DL.getAllocaAddrSpace();
725 MachinePointerInfo PtrInfo;
726
727 Register StackPtrInt =
728 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
729 .getReg(0);
730
731 auto &Ctx = MF.getFunction().getContext();
732 auto LibcallResult = createLibcall(
733 getRTLibDesc(MI.getOpcode(), Size), {DstFrac, OpType, 0},
734 {{Src, OpType, 0}, {StackPtrInt, PointerType::get(Ctx, AddrSpace), 1}},
735 LocObserver, &MI);
736
737 if (LibcallResult != LegalizeResult::Legalized)
739
741 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
742
743 MIRBuilder.buildLoad(DstInt, StackPtrInt, *LoadMMOInt);
744 MI.eraseFromParent();
745
747}
748
749static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
750 Type *FromType) {
751 auto ToMVT = MVT::getVT(ToType);
752 auto FromMVT = MVT::getVT(FromType);
753
754 switch (Opcode) {
755 case TargetOpcode::G_FPEXT:
756 return RTLIB::getFPEXT(FromMVT, ToMVT);
757 case TargetOpcode::G_FPTRUNC:
758 return RTLIB::getFPROUND(FromMVT, ToMVT);
759 case TargetOpcode::G_FPTOSI:
760 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
761 case TargetOpcode::G_FPTOUI:
762 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
763 case TargetOpcode::G_SITOFP:
764 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
765 case TargetOpcode::G_UITOFP:
766 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
767 }
768 llvm_unreachable("Unsupported libcall function");
769}
770
772 MachineInstr &MI, Type *ToType, Type *FromType,
773 LostDebugLocObserver &LocObserver, bool IsSigned) const {
774 CallLowering::ArgInfo Arg = {MI.getOperand(1).getReg(), FromType, 0};
775 if (FromType->isIntegerTy()) {
776 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
777 Arg.Flags[0].setSExt();
778 else
779 Arg.Flags[0].setZExt();
780 }
781
782 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
783 return createLibcall(Libcall, {MI.getOperand(0).getReg(), ToType, 0}, Arg,
784 LocObserver, &MI);
785}
786
789 LostDebugLocObserver &LocObserver) const {
790 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
791
793 // Add all the args, except for the last which is an imm denoting 'tail'.
794 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
795 Register Reg = MI.getOperand(i).getReg();
796
797 // Need derive an IR type for call lowering.
798 LLT OpLLT = MRI.getType(Reg);
799 Type *OpTy = nullptr;
800 if (OpLLT.isPointer())
801 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
802 else
803 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
804 Args.push_back({Reg, OpTy, 0});
805 }
806
807 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
808 RTLIB::Libcall RTLibcall;
809 unsigned Opc = MI.getOpcode();
810 switch (Opc) {
811 case TargetOpcode::G_BZERO:
812 RTLibcall = RTLIB::BZERO;
813 break;
814 case TargetOpcode::G_MEMCPY:
815 RTLibcall = RTLIB::MEMCPY;
816 Args[0].Flags[0].setReturned();
817 break;
818 case TargetOpcode::G_MEMMOVE:
819 RTLibcall = RTLIB::MEMMOVE;
820 Args[0].Flags[0].setReturned();
821 break;
822 case TargetOpcode::G_MEMSET:
823 RTLibcall = RTLIB::MEMSET;
824 Args[0].Flags[0].setReturned();
825 break;
826 default:
827 llvm_unreachable("unsupported opcode");
828 }
829
830 if (!Libcalls) // FIXME: Should be mandatory
832
833 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
834
835 // Unsupported libcall on the target.
836 if (RTLibcallImpl == RTLIB::Unsupported) {
837 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
838 << MIRBuilder.getTII().getName(Opc) << "\n");
840 }
841
843 Info.CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
844
845 StringRef LibcallName =
847 Info.Callee = MachineOperand::CreateES(LibcallName.data());
848 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
849 Info.IsTailCall =
850 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
851 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
852
853 llvm::append_range(Info.OrigArgs, Args);
854 if (!CLI.lowerCall(MIRBuilder, Info))
856
857 if (Info.LoweredTailCall) {
858 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
859
860 // Check debug locations before removing the return.
861 LocObserver.checkpoint(true);
862
863 // We must have a return following the call (or debug insts) to get past
864 // isLibCallInTailPosition.
865 do {
866 MachineInstr *Next = MI.getNextNode();
867 assert(Next &&
868 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
869 "Expected instr following MI to be return or debug inst?");
870 // We lowered a tail call, so the call is now the return from the block.
871 // Delete the old return.
872 Next->eraseFromParent();
873 } while (MI.getNextNode());
874
875 // We expect to lose the debug location from the return.
876 LocObserver.checkpoint(false);
877 }
878
880}
881
882static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
883 unsigned Opc = MI.getOpcode();
884 auto &AtomicMI = cast<GMemOperation>(MI);
885 auto &MMO = AtomicMI.getMMO();
886 auto Ordering = MMO.getMergedOrdering();
887 LLT MemType = MMO.getMemoryType();
888 uint64_t MemSize = MemType.getSizeInBytes();
889 if (MemType.isVector())
890 return RTLIB::UNKNOWN_LIBCALL;
891
892#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
893#define LCALL5(A) \
894 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
895 switch (Opc) {
896 case TargetOpcode::G_ATOMIC_CMPXCHG:
897 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
898 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
899 return getOutlineAtomicHelper(LC, Ordering, MemSize);
900 }
901 case TargetOpcode::G_ATOMICRMW_XCHG: {
902 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
903 return getOutlineAtomicHelper(LC, Ordering, MemSize);
904 }
905 case TargetOpcode::G_ATOMICRMW_ADD:
906 case TargetOpcode::G_ATOMICRMW_SUB: {
907 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
908 return getOutlineAtomicHelper(LC, Ordering, MemSize);
909 }
910 case TargetOpcode::G_ATOMICRMW_AND: {
911 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
912 return getOutlineAtomicHelper(LC, Ordering, MemSize);
913 }
914 case TargetOpcode::G_ATOMICRMW_OR: {
915 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
916 return getOutlineAtomicHelper(LC, Ordering, MemSize);
917 }
918 case TargetOpcode::G_ATOMICRMW_XOR: {
919 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
920 return getOutlineAtomicHelper(LC, Ordering, MemSize);
921 }
922 default:
923 return RTLIB::UNKNOWN_LIBCALL;
924 }
925#undef LCALLS
926#undef LCALL5
927}
928
931 auto &Ctx = MIRBuilder.getContext();
932
933 Type *RetTy;
934 SmallVector<Register> RetRegs;
936 unsigned Opc = MI.getOpcode();
937 switch (Opc) {
938 case TargetOpcode::G_ATOMIC_CMPXCHG:
939 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
941 LLT SuccessLLT;
942 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
943 MI.getFirst4RegLLTs();
944 RetRegs.push_back(Ret);
945 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
946 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
947 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
948 NewLLT) = MI.getFirst5RegLLTs();
949 RetRegs.push_back(Success);
950 RetTy = StructType::get(
951 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
952 }
953 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
954 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
955 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
956 break;
957 }
958 case TargetOpcode::G_ATOMICRMW_XCHG:
959 case TargetOpcode::G_ATOMICRMW_ADD:
960 case TargetOpcode::G_ATOMICRMW_SUB:
961 case TargetOpcode::G_ATOMICRMW_AND:
962 case TargetOpcode::G_ATOMICRMW_OR:
963 case TargetOpcode::G_ATOMICRMW_XOR: {
964 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
965 RetRegs.push_back(Ret);
966 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
967 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
968 Val =
969 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
970 .getReg(0);
971 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
972 Val =
973 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
974 .getReg(0);
975 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
976 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
977 break;
978 }
979 default:
980 llvm_unreachable("unsupported opcode");
981 }
982
983 if (!Libcalls) // FIXME: Should be mandatory
985
986 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
987 RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(MI);
988 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
989
990 // Unsupported libcall on the target.
991 if (RTLibcallImpl == RTLIB::Unsupported) {
992 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
993 << MIRBuilder.getTII().getName(Opc) << "\n");
995 }
996
998 Info.CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
999
1000 StringRef LibcallName =
1002 Info.Callee = MachineOperand::CreateES(LibcallName.data());
1003 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
1004
1005 llvm::append_range(Info.OrigArgs, Args);
1006 if (!CLI.lowerCall(MIRBuilder, Info))
1008
1010}
1011
1012static RTLIB::Libcall
1014 RTLIB::Libcall RTLibcall;
1015 switch (MI.getOpcode()) {
1016 case TargetOpcode::G_GET_FPENV:
1017 RTLibcall = RTLIB::FEGETENV;
1018 break;
1019 case TargetOpcode::G_SET_FPENV:
1020 case TargetOpcode::G_RESET_FPENV:
1021 RTLibcall = RTLIB::FESETENV;
1022 break;
1023 case TargetOpcode::G_GET_FPMODE:
1024 RTLibcall = RTLIB::FEGETMODE;
1025 break;
1026 case TargetOpcode::G_SET_FPMODE:
1027 case TargetOpcode::G_RESET_FPMODE:
1028 RTLibcall = RTLIB::FESETMODE;
1029 break;
1030 default:
1031 llvm_unreachable("Unexpected opcode");
1032 }
1033 return RTLibcall;
1034}
1035
1036// Some library functions that read FP state (fegetmode, fegetenv) write the
1037// state into a region in memory. IR intrinsics that do the same operations
1038// (get_fpmode, get_fpenv) return the state as integer value. To implement these
1039// intrinsics via the library functions, we need to use temporary variable,
1040// for example:
1041//
1042// %0:_(s32) = G_GET_FPMODE
1043//
1044// is transformed to:
1045//
1046// %1:_(p0) = G_FRAME_INDEX %stack.0
1047// BL &fegetmode
1048// %0:_(s32) = G_LOAD % 1
1049//
1051LegalizerHelper::createGetStateLibcall(MachineInstr &MI,
1052 LostDebugLocObserver &LocObserver) {
1053 const DataLayout &DL = MIRBuilder.getDataLayout();
1054 auto &MF = MIRBuilder.getMF();
1055 auto &MRI = *MIRBuilder.getMRI();
1056 auto &Ctx = MF.getFunction().getContext();
1057
1058 // Create temporary, where library function will put the read state.
1059 Register Dst = MI.getOperand(0).getReg();
1060 LLT StateTy = MRI.getType(Dst);
1061 TypeSize StateSize = StateTy.getSizeInBytes();
1062 Align TempAlign = getStackTemporaryAlignment(StateTy);
1063 MachinePointerInfo TempPtrInfo;
1064 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1065
1066 // Create a call to library function, with the temporary as an argument.
1067 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1068 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1069 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1070 auto Res = createLibcall(
1071 RTLibcall, CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1072 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}), LocObserver,
1073 nullptr);
1074 if (Res != LegalizerHelper::Legalized)
1075 return Res;
1076
1077 // Create a load from the temporary.
1078 MachineMemOperand *MMO = MF.getMachineMemOperand(
1079 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
1080 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
1081
1083}
1084
1085// Similar to `createGetStateLibcall` the function calls a library function
1086// using transient space in stack. In this case the library function reads
1087// content of memory region.
1089LegalizerHelper::createSetStateLibcall(MachineInstr &MI,
1090 LostDebugLocObserver &LocObserver) {
1091 const DataLayout &DL = MIRBuilder.getDataLayout();
1092 auto &MF = MIRBuilder.getMF();
1093 auto &MRI = *MIRBuilder.getMRI();
1094 auto &Ctx = MF.getFunction().getContext();
1095
1096 // Create temporary, where library function will get the new state.
1097 Register Src = MI.getOperand(0).getReg();
1098 LLT StateTy = MRI.getType(Src);
1099 TypeSize StateSize = StateTy.getSizeInBytes();
1100 Align TempAlign = getStackTemporaryAlignment(StateTy);
1101 MachinePointerInfo TempPtrInfo;
1102 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1103
1104 // Put the new state into the temporary.
1105 MachineMemOperand *MMO = MF.getMachineMemOperand(
1106 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
1107 MIRBuilder.buildStore(Src, Temp, *MMO);
1108
1109 // Create a call to library function, with the temporary as an argument.
1110 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1111 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1112 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1113 return createLibcall(RTLibcall,
1114 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1115 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1116 LocObserver, nullptr);
1117}
1118
1119/// Returns the corresponding libcall for the given Pred and
1120/// the ICMP predicate that should be generated to compare with #0
1121/// after the libcall.
1122static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1124#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1125 do { \
1126 switch (Size) { \
1127 case 32: \
1128 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1129 case 64: \
1130 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1131 case 128: \
1132 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1133 default: \
1134 llvm_unreachable("unexpected size"); \
1135 } \
1136 } while (0)
1137
1138 switch (Pred) {
1139 case CmpInst::FCMP_OEQ:
1141 case CmpInst::FCMP_UNE:
1143 case CmpInst::FCMP_OGE:
1145 case CmpInst::FCMP_OLT:
1147 case CmpInst::FCMP_OLE:
1149 case CmpInst::FCMP_OGT:
1151 case CmpInst::FCMP_UNO:
1153 default:
1154 return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE};
1155 }
1156}
1157
1159LegalizerHelper::createFCMPLibcall(MachineInstr &MI,
1160 LostDebugLocObserver &LocObserver) {
1161 auto &MF = MIRBuilder.getMF();
1162 auto &Ctx = MF.getFunction().getContext();
1163 const GFCmp *Cmp = cast<GFCmp>(&MI);
1164
1165 LLT OpLLT = MRI.getType(Cmp->getLHSReg());
1166 unsigned Size = OpLLT.getSizeInBits();
1167 if ((Size != 32 && Size != 64 && Size != 128) ||
1168 OpLLT != MRI.getType(Cmp->getRHSReg()))
1169 return UnableToLegalize;
1170
1171 Type *OpType = getFloatTypeForLLT(Ctx, OpLLT);
1172
1173 // DstReg type is s32
1174 const Register DstReg = Cmp->getReg(0);
1175 LLT DstTy = MRI.getType(DstReg);
1176 const auto Cond = Cmp->getCond();
1177
1178 // Reference:
1179 // https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
1180 // Generates a libcall followed by ICMP.
1181 const auto BuildLibcall = [&](const RTLIB::Libcall Libcall,
1182 const CmpInst::Predicate ICmpPred,
1183 const DstOp &Res) -> Register {
1184 // FCMP libcall always returns an i32, and needs an ICMP with #0.
1185 constexpr LLT TempLLT = LLT::scalar(32);
1186 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1187 // Generate libcall, holding result in Temp
1188 const auto Status = createLibcall(
1189 Libcall, {Temp, Type::getInt32Ty(Ctx), 0},
1190 {{Cmp->getLHSReg(), OpType, 0}, {Cmp->getRHSReg(), OpType, 1}},
1191 LocObserver, &MI);
1192 if (!Status)
1193 return {};
1194
1195 // Compare temp with #0 to get the final result.
1196 return MIRBuilder
1197 .buildICmp(ICmpPred, Res, Temp, MIRBuilder.buildConstant(TempLLT, 0))
1198 .getReg(0);
1199 };
1200
1201 // Simple case if we have a direct mapping from predicate to libcall
1202 if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond, Size);
1203 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1204 ICmpPred != CmpInst::BAD_ICMP_PREDICATE) {
1205 if (BuildLibcall(Libcall, ICmpPred, DstReg)) {
1206 return Legalized;
1207 }
1208 return UnableToLegalize;
1209 }
1210
1211 // No direct mapping found, should be generated as combination of libcalls.
1212
1213 switch (Cond) {
1214 case CmpInst::FCMP_UEQ: {
1215 // FCMP_UEQ: unordered or equal
1216 // Convert into (FCMP_OEQ || FCMP_UNO).
1217
1218 const auto [OeqLibcall, OeqPred] =
1220 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1221
1222 const auto [UnoLibcall, UnoPred] =
1224 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1225 if (Oeq && Uno)
1226 MIRBuilder.buildOr(DstReg, Oeq, Uno);
1227 else
1228 return UnableToLegalize;
1229
1230 break;
1231 }
1232 case CmpInst::FCMP_ONE: {
1233 // FCMP_ONE: ordered and operands are unequal
1234 // Convert into (!FCMP_OEQ && !FCMP_UNO).
1235
1236 // We inverse the predicate instead of generating a NOT
1237 // to save one instruction.
1238 // On AArch64 isel can even select two cmp into a single ccmp.
1239 const auto [OeqLibcall, OeqPred] =
1241 const auto NotOeq =
1242 BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred), DstTy);
1243
1244 const auto [UnoLibcall, UnoPred] =
1246 const auto NotUno =
1247 BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred), DstTy);
1248
1249 if (NotOeq && NotUno)
1250 MIRBuilder.buildAnd(DstReg, NotOeq, NotUno);
1251 else
1252 return UnableToLegalize;
1253
1254 break;
1255 }
1256 case CmpInst::FCMP_ULT:
1257 case CmpInst::FCMP_UGE:
1258 case CmpInst::FCMP_UGT:
1259 case CmpInst::FCMP_ULE:
1260 case CmpInst::FCMP_ORD: {
1261 // Convert into: !(inverse(Pred))
1262 // E.g. FCMP_ULT becomes !FCMP_OGE
1263 // This is equivalent to the following, but saves some instructions.
1264 // MIRBuilder.buildNot(
1265 // PredTy,
1266 // MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
1267 // Op1, Op2));
1268 const auto [InversedLibcall, InversedPred] =
1270 if (!BuildLibcall(InversedLibcall,
1271 CmpInst::getInversePredicate(InversedPred), DstReg))
1272 return UnableToLegalize;
1273 break;
1274 }
1275 default:
1276 return UnableToLegalize;
1277 }
1278
1279 return Legalized;
1280}
1281
1282// The function is used to legalize operations that set default environment
1283// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
1284// On most targets supported in glibc FE_DFL_MODE is defined as
1285// `((const femode_t *) -1)`. Such assumption is used here. If for some target
1286// it is not true, the target must provide custom lowering.
1288LegalizerHelper::createResetStateLibcall(MachineInstr &MI,
1289 LostDebugLocObserver &LocObserver) {
1290 const DataLayout &DL = MIRBuilder.getDataLayout();
1291 auto &MF = MIRBuilder.getMF();
1292 auto &Ctx = MF.getFunction().getContext();
1293
1294 // Create an argument for the library function.
1295 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
1296 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
1297 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
1298 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
1299 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
1300 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1301 MIRBuilder.buildIntToPtr(Dest, DefValue);
1302
1303 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1304 return createLibcall(
1305 RTLibcall, CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1306 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}), LocObserver, &MI);
1307}
1308
1311 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1312
1313 switch (MI.getOpcode()) {
1314 default:
1315 return UnableToLegalize;
1316 case TargetOpcode::G_MUL:
1317 case TargetOpcode::G_SDIV:
1318 case TargetOpcode::G_UDIV:
1319 case TargetOpcode::G_SREM:
1320 case TargetOpcode::G_UREM:
1321 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1322 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1323 unsigned Size = LLTy.getSizeInBits();
1324 Type *HLTy = IntegerType::get(Ctx, Size);
1325 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1326 if (Status != Legalized)
1327 return Status;
1328 break;
1329 }
1330 case TargetOpcode::G_FADD:
1331 case TargetOpcode::G_FSUB:
1332 case TargetOpcode::G_FMUL:
1333 case TargetOpcode::G_FDIV:
1334 case TargetOpcode::G_FMA:
1335 case TargetOpcode::G_FPOW:
1336 case TargetOpcode::G_FREM:
1337 case TargetOpcode::G_FCOS:
1338 case TargetOpcode::G_FSIN:
1339 case TargetOpcode::G_FTAN:
1340 case TargetOpcode::G_FACOS:
1341 case TargetOpcode::G_FASIN:
1342 case TargetOpcode::G_FATAN:
1343 case TargetOpcode::G_FATAN2:
1344 case TargetOpcode::G_FCOSH:
1345 case TargetOpcode::G_FSINH:
1346 case TargetOpcode::G_FTANH:
1347 case TargetOpcode::G_FLOG10:
1348 case TargetOpcode::G_FLOG:
1349 case TargetOpcode::G_FLOG2:
1350 case TargetOpcode::G_FEXP:
1351 case TargetOpcode::G_FEXP2:
1352 case TargetOpcode::G_FEXP10:
1353 case TargetOpcode::G_FCEIL:
1354 case TargetOpcode::G_FFLOOR:
1355 case TargetOpcode::G_FMINNUM:
1356 case TargetOpcode::G_FMAXNUM:
1357 case TargetOpcode::G_FMINIMUMNUM:
1358 case TargetOpcode::G_FMAXIMUMNUM:
1359 case TargetOpcode::G_FSQRT:
1360 case TargetOpcode::G_FRINT:
1361 case TargetOpcode::G_FNEARBYINT:
1362 case TargetOpcode::G_INTRINSIC_TRUNC:
1363 case TargetOpcode::G_INTRINSIC_ROUND:
1364 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1365 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1366 unsigned Size = LLTy.getSizeInBits();
1367 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1368 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1369 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1370 return UnableToLegalize;
1371 }
1372 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1373 if (Status != Legalized)
1374 return Status;
1375 break;
1376 }
1377 case TargetOpcode::G_FSINCOS: {
1378 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1379 unsigned Size = LLTy.getSizeInBits();
1380 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1381 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1382 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1383 return UnableToLegalize;
1384 }
1385 return emitSincosLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1386 }
1387 case TargetOpcode::G_FMODF: {
1388 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1389 unsigned Size = LLTy.getSizeInBits();
1390 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1391 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1392 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1393 return UnableToLegalize;
1394 }
1395 return emitModfLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1396 }
1397 case TargetOpcode::G_LROUND:
1398 case TargetOpcode::G_LLROUND:
1399 case TargetOpcode::G_INTRINSIC_LRINT:
1400 case TargetOpcode::G_INTRINSIC_LLRINT: {
1401 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1402 unsigned Size = LLTy.getSizeInBits();
1403 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1404 Type *ITy = IntegerType::get(
1405 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1406 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1407 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1408 return UnableToLegalize;
1409 }
1410 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1412 createLibcall(Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1413 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1414 if (Status != Legalized)
1415 return Status;
1416 MI.eraseFromParent();
1417 return Legalized;
1418 }
1419 case TargetOpcode::G_FPOWI:
1420 case TargetOpcode::G_FLDEXP: {
1421 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1422 unsigned Size = LLTy.getSizeInBits();
1423 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1424 Type *ITy = IntegerType::get(
1425 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1426 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1427 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1428 return UnableToLegalize;
1429 }
1430 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1432 {MI.getOperand(1).getReg(), HLTy, 0},
1433 {MI.getOperand(2).getReg(), ITy, 1}};
1434 Args[1].Flags[0].setSExt();
1436 Libcall, {MI.getOperand(0).getReg(), HLTy, 0}, Args, LocObserver, &MI);
1437 if (Status != Legalized)
1438 return Status;
1439 break;
1440 }
1441 case TargetOpcode::G_FPEXT:
1442 case TargetOpcode::G_FPTRUNC: {
1443 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1444 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1445 if (!FromTy || !ToTy)
1446 return UnableToLegalize;
1447 LegalizeResult Status = conversionLibcall(MI, ToTy, FromTy, LocObserver);
1448 if (Status != Legalized)
1449 return Status;
1450 break;
1451 }
1452 case TargetOpcode::G_FCMP: {
1453 LegalizeResult Status = createFCMPLibcall(MI, LocObserver);
1454 if (Status != Legalized)
1455 return Status;
1456 MI.eraseFromParent();
1457 return Status;
1458 }
1459 case TargetOpcode::G_FPTOSI:
1460 case TargetOpcode::G_FPTOUI: {
1461 // FIXME: Support other types
1462 Type *FromTy =
1463 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1464 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1465 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1466 return UnableToLegalize;
1468 FromTy, LocObserver);
1469 if (Status != Legalized)
1470 return Status;
1471 break;
1472 }
1473 case TargetOpcode::G_SITOFP:
1474 case TargetOpcode::G_UITOFP: {
1475 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1476 Type *ToTy =
1477 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1478 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1479 return UnableToLegalize;
1480 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SITOFP;
1482 MI, ToTy, Type::getIntNTy(Ctx, FromSize), LocObserver, IsSigned);
1483 if (Status != Legalized)
1484 return Status;
1485 break;
1486 }
1487 case TargetOpcode::G_ATOMICRMW_XCHG:
1488 case TargetOpcode::G_ATOMICRMW_ADD:
1489 case TargetOpcode::G_ATOMICRMW_SUB:
1490 case TargetOpcode::G_ATOMICRMW_AND:
1491 case TargetOpcode::G_ATOMICRMW_OR:
1492 case TargetOpcode::G_ATOMICRMW_XOR:
1493 case TargetOpcode::G_ATOMIC_CMPXCHG:
1494 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1496 if (Status != Legalized)
1497 return Status;
1498 break;
1499 }
1500 case TargetOpcode::G_BZERO:
1501 case TargetOpcode::G_MEMCPY:
1502 case TargetOpcode::G_MEMMOVE:
1503 case TargetOpcode::G_MEMSET: {
1504 LegalizeResult Result =
1505 createMemLibcall(*MIRBuilder.getMRI(), MI, LocObserver);
1506 if (Result != Legalized)
1507 return Result;
1508 MI.eraseFromParent();
1509 return Result;
1510 }
1511 case TargetOpcode::G_GET_FPENV:
1512 case TargetOpcode::G_GET_FPMODE: {
1513 LegalizeResult Result = createGetStateLibcall(MI, LocObserver);
1514 if (Result != Legalized)
1515 return Result;
1516 break;
1517 }
1518 case TargetOpcode::G_SET_FPENV:
1519 case TargetOpcode::G_SET_FPMODE: {
1520 LegalizeResult Result = createSetStateLibcall(MI, LocObserver);
1521 if (Result != Legalized)
1522 return Result;
1523 break;
1524 }
1525 case TargetOpcode::G_RESET_FPENV:
1526 case TargetOpcode::G_RESET_FPMODE: {
1527 LegalizeResult Result = createResetStateLibcall(MI, LocObserver);
1528 if (Result != Legalized)
1529 return Result;
1530 break;
1531 }
1532 }
1533
1534 MI.eraseFromParent();
1535 return Legalized;
1536}
1537
1539 unsigned TypeIdx,
1540 LLT NarrowTy) {
1541 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1542 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1543
1544 switch (MI.getOpcode()) {
1545 default:
1546 return UnableToLegalize;
1547 case TargetOpcode::G_IMPLICIT_DEF: {
1548 Register DstReg = MI.getOperand(0).getReg();
1549 LLT DstTy = MRI.getType(DstReg);
1550
1551 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1552 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1553 // FIXME: Although this would also be legal for the general case, it causes
1554 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1555 // combines not being hit). This seems to be a problem related to the
1556 // artifact combiner.
1557 if (SizeOp0 % NarrowSize != 0) {
1558 LLT ImplicitTy = DstTy.changeElementType(NarrowTy);
1559 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1560 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1561
1562 MI.eraseFromParent();
1563 return Legalized;
1564 }
1565
1566 int NumParts = SizeOp0 / NarrowSize;
1567
1569 for (int i = 0; i < NumParts; ++i)
1570 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1571
1572 if (DstTy.isVector())
1573 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1574 else
1575 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1576 MI.eraseFromParent();
1577 return Legalized;
1578 }
1579 case TargetOpcode::G_CONSTANT: {
1580 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1581 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1582 unsigned TotalSize = Ty.getSizeInBits();
1583 unsigned NarrowSize = NarrowTy.getSizeInBits();
1584 int NumParts = TotalSize / NarrowSize;
1585
1586 SmallVector<Register, 4> PartRegs;
1587 for (int I = 0; I != NumParts; ++I) {
1588 unsigned Offset = I * NarrowSize;
1589 auto K = MIRBuilder.buildConstant(NarrowTy,
1590 Val.lshr(Offset).trunc(NarrowSize));
1591 PartRegs.push_back(K.getReg(0));
1592 }
1593
1594 LLT LeftoverTy;
1595 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1596 SmallVector<Register, 1> LeftoverRegs;
1597 if (LeftoverBits != 0) {
1598 LeftoverTy = LLT::scalar(LeftoverBits);
1599 auto K = MIRBuilder.buildConstant(
1600 LeftoverTy,
1601 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1602 LeftoverRegs.push_back(K.getReg(0));
1603 }
1604
1605 insertParts(MI.getOperand(0).getReg(),
1606 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1607
1608 MI.eraseFromParent();
1609 return Legalized;
1610 }
1611 case TargetOpcode::G_SEXT:
1612 case TargetOpcode::G_ZEXT:
1613 case TargetOpcode::G_ANYEXT:
1614 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1615 case TargetOpcode::G_TRUNC: {
1616 if (TypeIdx != 1)
1617 return UnableToLegalize;
1618
1619 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1620 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1621 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1622 return UnableToLegalize;
1623 }
1624
1625 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1626 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1627 MI.eraseFromParent();
1628 return Legalized;
1629 }
1630 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1631 case TargetOpcode::G_FREEZE: {
1632 if (TypeIdx != 0)
1633 return UnableToLegalize;
1634
1635 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1636 // Should widen scalar first
1637 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1638 return UnableToLegalize;
1639
1640 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1642 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1643 Parts.push_back(
1644 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1645 .getReg(0));
1646 }
1647
1648 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1649 MI.eraseFromParent();
1650 return Legalized;
1651 }
1652 case TargetOpcode::G_ADD:
1653 case TargetOpcode::G_SUB:
1654 case TargetOpcode::G_SADDO:
1655 case TargetOpcode::G_SSUBO:
1656 case TargetOpcode::G_SADDE:
1657 case TargetOpcode::G_SSUBE:
1658 case TargetOpcode::G_UADDO:
1659 case TargetOpcode::G_USUBO:
1660 case TargetOpcode::G_UADDE:
1661 case TargetOpcode::G_USUBE:
1662 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1663 case TargetOpcode::G_MUL:
1664 case TargetOpcode::G_UMULH:
1665 return narrowScalarMul(MI, NarrowTy);
1666 case TargetOpcode::G_EXTRACT:
1667 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1668 case TargetOpcode::G_INSERT:
1669 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1670 case TargetOpcode::G_LOAD: {
1671 auto &LoadMI = cast<GLoad>(MI);
1672 Register DstReg = LoadMI.getDstReg();
1673 LLT DstTy = MRI.getType(DstReg);
1674 if (DstTy.isVector())
1675 return UnableToLegalize;
1676
1677 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1678 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1679 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1680 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1681 LoadMI.eraseFromParent();
1682 return Legalized;
1683 }
1684
1685 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1686 }
1687 case TargetOpcode::G_ZEXTLOAD:
1688 case TargetOpcode::G_SEXTLOAD: {
1689 auto &LoadMI = cast<GExtLoad>(MI);
1690 Register DstReg = LoadMI.getDstReg();
1691 Register PtrReg = LoadMI.getPointerReg();
1692
1693 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1694 auto &MMO = LoadMI.getMMO();
1695 unsigned MemSize = MMO.getSizeInBits().getValue();
1696
1697 if (MemSize == NarrowSize) {
1698 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1699 } else if (MemSize < NarrowSize) {
1700 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1701 } else if (MemSize > NarrowSize) {
1702 // FIXME: Need to split the load.
1703 return UnableToLegalize;
1704 }
1705
1706 if (isa<GZExtLoad>(LoadMI))
1707 MIRBuilder.buildZExt(DstReg, TmpReg);
1708 else
1709 MIRBuilder.buildSExt(DstReg, TmpReg);
1710
1711 LoadMI.eraseFromParent();
1712 return Legalized;
1713 }
1714 case TargetOpcode::G_STORE: {
1715 auto &StoreMI = cast<GStore>(MI);
1716
1717 Register SrcReg = StoreMI.getValueReg();
1718 LLT SrcTy = MRI.getType(SrcReg);
1719 if (SrcTy.isVector())
1720 return UnableToLegalize;
1721
1722 int NumParts = SizeOp0 / NarrowSize;
1723 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1724 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1725 if (SrcTy.isVector() && LeftoverBits != 0)
1726 return UnableToLegalize;
1727
1728 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1729 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1730 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1731 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1732 StoreMI.eraseFromParent();
1733 return Legalized;
1734 }
1735
1736 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1737 }
1738 case TargetOpcode::G_SELECT:
1739 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1740 case TargetOpcode::G_AND:
1741 case TargetOpcode::G_OR:
1742 case TargetOpcode::G_XOR: {
1743 // Legalize bitwise operation:
1744 // A = BinOp<Ty> B, C
1745 // into:
1746 // B1, ..., BN = G_UNMERGE_VALUES B
1747 // C1, ..., CN = G_UNMERGE_VALUES C
1748 // A1 = BinOp<Ty/N> B1, C2
1749 // ...
1750 // AN = BinOp<Ty/N> BN, CN
1751 // A = G_MERGE_VALUES A1, ..., AN
1752 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1753 }
1754 case TargetOpcode::G_SHL:
1755 case TargetOpcode::G_LSHR:
1756 case TargetOpcode::G_ASHR:
1757 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1758 case TargetOpcode::G_CTLZ:
1759 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1760 case TargetOpcode::G_CTTZ:
1761 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1762 case TargetOpcode::G_CTLS:
1763 case TargetOpcode::G_CTPOP:
1764 if (TypeIdx == 1)
1765 switch (MI.getOpcode()) {
1766 case TargetOpcode::G_CTLZ:
1767 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1768 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1769 case TargetOpcode::G_CTTZ:
1770 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1771 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1772 case TargetOpcode::G_CTPOP:
1773 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1774 case TargetOpcode::G_CTLS:
1775 return narrowScalarCTLS(MI, TypeIdx, NarrowTy);
1776 default:
1777 return UnableToLegalize;
1778 }
1779
1780 Observer.changingInstr(MI);
1781 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1782 Observer.changedInstr(MI);
1783 return Legalized;
1784 case TargetOpcode::G_INTTOPTR:
1785 if (TypeIdx != 1)
1786 return UnableToLegalize;
1787
1788 Observer.changingInstr(MI);
1789 narrowScalarSrc(MI, NarrowTy, 1);
1790 Observer.changedInstr(MI);
1791 return Legalized;
1792 case TargetOpcode::G_PTRTOINT:
1793 if (TypeIdx != 0)
1794 return UnableToLegalize;
1795
1796 Observer.changingInstr(MI);
1797 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1798 Observer.changedInstr(MI);
1799 return Legalized;
1800 case TargetOpcode::G_PHI: {
1801 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1802 // NarrowSize.
1803 if (SizeOp0 % NarrowSize != 0)
1804 return UnableToLegalize;
1805
1806 unsigned NumParts = SizeOp0 / NarrowSize;
1807 SmallVector<Register, 2> DstRegs(NumParts);
1808 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1809 Observer.changingInstr(MI);
1810 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1811 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1812 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
1813 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1814 SrcRegs[i / 2], MIRBuilder, MRI);
1815 }
1816 MachineBasicBlock &MBB = *MI.getParent();
1817 MIRBuilder.setInsertPt(MBB, MI);
1818 for (unsigned i = 0; i < NumParts; ++i) {
1819 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1821 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1822 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1823 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1824 }
1825 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1826 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1827 Observer.changedInstr(MI);
1828 MI.eraseFromParent();
1829 return Legalized;
1830 }
1831 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1832 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1833 if (TypeIdx != 2)
1834 return UnableToLegalize;
1835
1836 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1837 Observer.changingInstr(MI);
1838 narrowScalarSrc(MI, NarrowTy, OpIdx);
1839 Observer.changedInstr(MI);
1840 return Legalized;
1841 }
1842 case TargetOpcode::G_ICMP: {
1843 Register LHS = MI.getOperand(2).getReg();
1844 LLT SrcTy = MRI.getType(LHS);
1845 CmpInst::Predicate Pred =
1846 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1847
1848 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1849 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1850 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1851 LHSLeftoverRegs, MIRBuilder, MRI))
1852 return UnableToLegalize;
1853
1854 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1855 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1856 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1857 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1858 return UnableToLegalize;
1859
1860 // We now have the LHS and RHS of the compare split into narrow-type
1861 // registers, plus potentially some leftover type.
1862 Register Dst = MI.getOperand(0).getReg();
1863 LLT ResTy = MRI.getType(Dst);
1864 if (ICmpInst::isEquality(Pred)) {
1865 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1866 // them together. For each equal part, the result should be all 0s. For
1867 // each non-equal part, we'll get at least one 1.
1868 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1870 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1871 auto LHS = std::get<0>(LHSAndRHS);
1872 auto RHS = std::get<1>(LHSAndRHS);
1873 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1874 Xors.push_back(Xor);
1875 }
1876
1877 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1878 // to the desired narrow type so that we can OR them together later.
1879 SmallVector<Register, 4> WidenedXors;
1880 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1881 auto LHS = std::get<0>(LHSAndRHS);
1882 auto RHS = std::get<1>(LHSAndRHS);
1883 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1884 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1885 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1886 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1887 llvm::append_range(Xors, WidenedXors);
1888 }
1889
1890 // Now, for each part we broke up, we know if they are equal/not equal
1891 // based off the G_XOR. We can OR these all together and compare against
1892 // 0 to get the result.
1893 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1894 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1895 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1896 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1897 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1898 } else {
1899 Register CmpIn;
1900 for (unsigned I = 0, E = LHSPartRegs.size(); I != E; ++I) {
1901 Register CmpOut;
1902 CmpInst::Predicate PartPred;
1903
1904 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1905 PartPred = Pred;
1906 CmpOut = Dst;
1907 } else {
1908 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1909 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1910 }
1911
1912 if (!CmpIn) {
1913 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[I],
1914 RHSPartRegs[I]);
1915 } else {
1916 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[I],
1917 RHSPartRegs[I]);
1918 auto CmpEq = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1919 LHSPartRegs[I], RHSPartRegs[I]);
1920 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1921 }
1922
1923 CmpIn = CmpOut;
1924 }
1925
1926 for (unsigned I = 0, E = LHSLeftoverRegs.size(); I != E; ++I) {
1927 Register CmpOut;
1928 CmpInst::Predicate PartPred;
1929
1930 if (I == E - 1) {
1931 PartPred = Pred;
1932 CmpOut = Dst;
1933 } else {
1934 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1935 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1936 }
1937
1938 if (!CmpIn) {
1939 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[I],
1940 RHSLeftoverRegs[I]);
1941 } else {
1942 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[I],
1943 RHSLeftoverRegs[I]);
1944 auto CmpEq =
1945 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1946 LHSLeftoverRegs[I], RHSLeftoverRegs[I]);
1947 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1948 }
1949
1950 CmpIn = CmpOut;
1951 }
1952 }
1953 MI.eraseFromParent();
1954 return Legalized;
1955 }
1956 case TargetOpcode::G_FCMP:
1957 if (TypeIdx != 0)
1958 return UnableToLegalize;
1959
1960 Observer.changingInstr(MI);
1961 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1962 Observer.changedInstr(MI);
1963 return Legalized;
1964
1965 case TargetOpcode::G_SEXT_INREG: {
1966 if (TypeIdx != 0)
1967 return UnableToLegalize;
1968
1969 int64_t SizeInBits = MI.getOperand(2).getImm();
1970
1971 // So long as the new type has more bits than the bits we're extending we
1972 // don't need to break it apart.
1973 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1974 Observer.changingInstr(MI);
1975 // We don't lose any non-extension bits by truncating the src and
1976 // sign-extending the dst.
1977 MachineOperand &MO1 = MI.getOperand(1);
1978 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1979 MO1.setReg(TruncMIB.getReg(0));
1980
1981 MachineOperand &MO2 = MI.getOperand(0);
1982 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1983 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1984 MIRBuilder.buildSExt(MO2, DstExt);
1985 MO2.setReg(DstExt);
1986 Observer.changedInstr(MI);
1987 return Legalized;
1988 }
1989
1990 // Break it apart. Components below the extension point are unmodified. The
1991 // component containing the extension point becomes a narrower SEXT_INREG.
1992 // Components above it are ashr'd from the component containing the
1993 // extension point.
1994 if (SizeOp0 % NarrowSize != 0)
1995 return UnableToLegalize;
1996 int NumParts = SizeOp0 / NarrowSize;
1997
1998 // List the registers where the destination will be scattered.
2000 // List the registers where the source will be split.
2002
2003 // Create all the temporary registers.
2004 for (int i = 0; i < NumParts; ++i) {
2005 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
2006
2007 SrcRegs.push_back(SrcReg);
2008 }
2009
2010 // Explode the big arguments into smaller chunks.
2011 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
2012
2013 Register AshrCstReg =
2014 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
2015 .getReg(0);
2016 Register FullExtensionReg;
2017 Register PartialExtensionReg;
2018
2019 // Do the operation on each small part.
2020 for (int i = 0; i < NumParts; ++i) {
2021 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
2022 DstRegs.push_back(SrcRegs[i]);
2023 PartialExtensionReg = DstRegs.back();
2024 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
2025 assert(PartialExtensionReg &&
2026 "Expected to visit partial extension before full");
2027 if (FullExtensionReg) {
2028 DstRegs.push_back(FullExtensionReg);
2029 continue;
2030 }
2031 DstRegs.push_back(
2032 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
2033 .getReg(0));
2034 FullExtensionReg = DstRegs.back();
2035 } else {
2036 DstRegs.push_back(
2038 .buildInstr(
2039 TargetOpcode::G_SEXT_INREG, {NarrowTy},
2040 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
2041 .getReg(0));
2042 PartialExtensionReg = DstRegs.back();
2043 }
2044 }
2045
2046 // Gather the destination registers into the final destination.
2047 Register DstReg = MI.getOperand(0).getReg();
2048 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
2049 MI.eraseFromParent();
2050 return Legalized;
2051 }
2052 case TargetOpcode::G_BSWAP:
2053 case TargetOpcode::G_BITREVERSE: {
2054 if (SizeOp0 % NarrowSize != 0)
2055 return UnableToLegalize;
2056
2057 Observer.changingInstr(MI);
2058 SmallVector<Register, 2> SrcRegs, DstRegs;
2059 unsigned NumParts = SizeOp0 / NarrowSize;
2060 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
2061 MIRBuilder, MRI);
2062
2063 for (unsigned i = 0; i < NumParts; ++i) {
2064 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
2065 {SrcRegs[NumParts - 1 - i]});
2066 DstRegs.push_back(DstPart.getReg(0));
2067 }
2068
2069 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
2070
2071 Observer.changedInstr(MI);
2072 MI.eraseFromParent();
2073 return Legalized;
2074 }
2075 case TargetOpcode::G_PTR_ADD:
2076 case TargetOpcode::G_PTRMASK: {
2077 if (TypeIdx != 1)
2078 return UnableToLegalize;
2079 Observer.changingInstr(MI);
2080 narrowScalarSrc(MI, NarrowTy, 2);
2081 Observer.changedInstr(MI);
2082 return Legalized;
2083 }
2084 case TargetOpcode::G_FPTOUI:
2085 case TargetOpcode::G_FPTOSI:
2086 case TargetOpcode::G_FPTOUI_SAT:
2087 case TargetOpcode::G_FPTOSI_SAT:
2088 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
2089 case TargetOpcode::G_FPEXT:
2090 if (TypeIdx != 0)
2091 return UnableToLegalize;
2092 Observer.changingInstr(MI);
2093 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
2094 Observer.changedInstr(MI);
2095 return Legalized;
2096 case TargetOpcode::G_FLDEXP:
2097 case TargetOpcode::G_STRICT_FLDEXP:
2098 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
2099 case TargetOpcode::G_VSCALE: {
2100 Register Dst = MI.getOperand(0).getReg();
2101 LLT Ty = MRI.getType(Dst);
2102
2103 // Assume VSCALE(1) fits into a legal integer
2104 const APInt One(NarrowTy.getSizeInBits(), 1);
2105 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
2106 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
2107 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
2108 MIRBuilder.buildMul(Dst, ZExt, C);
2109
2110 MI.eraseFromParent();
2111 return Legalized;
2112 }
2113 }
2114}
2115
2117 LLT Ty = MRI.getType(Val);
2118 if (Ty.isScalar())
2119 return Val;
2120
2121 const DataLayout &DL = MIRBuilder.getDataLayout();
2122 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
2123 if (Ty.isPointer()) {
2124 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2125 return Register();
2126 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2127 }
2128
2129 Register NewVal = Val;
2130
2131 assert(Ty.isVector());
2132 if (Ty.isPointerVector())
2133 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2134 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2135}
2136
2138 unsigned OpIdx, unsigned ExtOpcode) {
2139 MachineOperand &MO = MI.getOperand(OpIdx);
2140 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2141 MO.setReg(ExtB.getReg(0));
2142}
2143
2145 unsigned OpIdx) {
2146 MachineOperand &MO = MI.getOperand(OpIdx);
2147 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
2148 MO.setReg(ExtB.getReg(0));
2149}
2150
2152 unsigned OpIdx, unsigned TruncOpcode) {
2153 MachineOperand &MO = MI.getOperand(OpIdx);
2154 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2155 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2156 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2157 MO.setReg(DstExt);
2158}
2159
2161 unsigned OpIdx, unsigned ExtOpcode) {
2162 MachineOperand &MO = MI.getOperand(OpIdx);
2163 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2164 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2165 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2166 MO.setReg(DstTrunc);
2167}
2168
2170 unsigned OpIdx) {
2171 MachineOperand &MO = MI.getOperand(OpIdx);
2172 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2173 Register Dst = MO.getReg();
2174 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2175 MO.setReg(DstExt);
2176 MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
2177}
2178
2180 unsigned OpIdx) {
2181 MachineOperand &MO = MI.getOperand(OpIdx);
2182 MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
2183}
2184
2186 MachineOperand &Op = MI.getOperand(OpIdx);
2187 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
2188}
2189
2191 MachineOperand &MO = MI.getOperand(OpIdx);
2192 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2193 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2194 MIRBuilder.buildBitcast(MO, CastDst);
2195 MO.setReg(CastDst);
2196}
2197
2199LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
2200 LLT WideTy) {
2201 if (TypeIdx != 1)
2202 return UnableToLegalize;
2203
2204 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
2205 if (DstTy.isVector())
2206 return UnableToLegalize;
2207
2208 LLT SrcTy = MRI.getType(Src1Reg);
2209 const int DstSize = DstTy.getSizeInBits();
2210 const int SrcSize = SrcTy.getSizeInBits();
2211 const int WideSize = WideTy.getSizeInBits();
2212 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2213
2214 unsigned NumOps = MI.getNumOperands();
2215 unsigned NumSrc = MI.getNumOperands() - 1;
2216 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2217
2218 if (WideSize >= DstSize) {
2219 // Directly pack the bits in the target type.
2220 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
2221
2222 for (unsigned I = 2; I != NumOps; ++I) {
2223 const unsigned Offset = (I - 1) * PartSize;
2224
2225 Register SrcReg = MI.getOperand(I).getReg();
2226 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
2227
2228 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
2229
2230 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
2231 MRI.createGenericVirtualRegister(WideTy);
2232
2233 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
2234 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
2235 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
2236 ResultReg = NextResult;
2237 }
2238
2239 if (WideSize > DstSize)
2240 MIRBuilder.buildTrunc(DstReg, ResultReg);
2241 else if (DstTy.isPointer())
2242 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
2243
2244 MI.eraseFromParent();
2245 return Legalized;
2246 }
2247
2248 // Unmerge the original values to the GCD type, and recombine to the next
2249 // multiple greater than the original type.
2250 //
2251 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
2252 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
2253 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
2254 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
2255 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
2256 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
2257 // %12:_(s12) = G_MERGE_VALUES %10, %11
2258 //
2259 // Padding with undef if necessary:
2260 //
2261 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
2262 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
2263 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
2264 // %7:_(s2) = G_IMPLICIT_DEF
2265 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
2266 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
2267 // %10:_(s12) = G_MERGE_VALUES %8, %9
2268
2269 const int GCD = std::gcd(SrcSize, WideSize);
2270 LLT GCDTy = LLT::scalar(GCD);
2271
2272 SmallVector<Register, 8> NewMergeRegs;
2273 SmallVector<Register, 8> Unmerges;
2274 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
2275
2276 // Decompose the original operands if they don't evenly divide.
2277 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
2278 Register SrcReg = MO.getReg();
2279 if (GCD == SrcSize) {
2280 Unmerges.push_back(SrcReg);
2281 } else {
2282 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2283 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2284 Unmerges.push_back(Unmerge.getReg(J));
2285 }
2286 }
2287
2288 // Pad with undef to the next size that is a multiple of the requested size.
2289 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
2290 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
2291 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
2292 Unmerges.push_back(UndefReg);
2293 }
2294
2295 const int PartsPerGCD = WideSize / GCD;
2296
2297 // Build merges of each piece.
2298 ArrayRef<Register> Slicer(Unmerges);
2299 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2300 auto Merge =
2301 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2302 NewMergeRegs.push_back(Merge.getReg(0));
2303 }
2304
2305 // A truncate may be necessary if the requested type doesn't evenly divide the
2306 // original result type.
2307 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
2308 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2309 } else {
2310 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2311 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2312 }
2313
2314 MI.eraseFromParent();
2315 return Legalized;
2316}
2317
2319LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
2320 LLT WideTy) {
2321 if (TypeIdx != 0)
2322 return UnableToLegalize;
2323
2324 int NumDst = MI.getNumOperands() - 1;
2325 Register SrcReg = MI.getOperand(NumDst).getReg();
2326 LLT SrcTy = MRI.getType(SrcReg);
2327 if (SrcTy.isVector())
2328 return UnableToLegalize;
2329
2330 Register Dst0Reg = MI.getOperand(0).getReg();
2331 LLT DstTy = MRI.getType(Dst0Reg);
2332 if (!DstTy.isScalar())
2333 return UnableToLegalize;
2334
2335 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
2336 if (SrcTy.isPointer()) {
2337 const DataLayout &DL = MIRBuilder.getDataLayout();
2338 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
2339 LLVM_DEBUG(
2340 dbgs() << "Not casting non-integral address space integer\n");
2341 return UnableToLegalize;
2342 }
2343
2344 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
2345 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2346 }
2347
2348 // Widen SrcTy to WideTy. This does not affect the result, but since the
2349 // user requested this size, it is probably better handled than SrcTy and
2350 // should reduce the total number of legalization artifacts.
2351 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2352 SrcTy = WideTy;
2353 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2354 }
2355
2356 // Theres no unmerge type to target. Directly extract the bits from the
2357 // source type
2358 unsigned DstSize = DstTy.getSizeInBits();
2359
2360 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
2361 for (int I = 1; I != NumDst; ++I) {
2362 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
2363 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2364 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
2365 }
2366
2367 MI.eraseFromParent();
2368 return Legalized;
2369 }
2370
2371 // Extend the source to a wider type.
2372 LLT LCMTy = getLCMType(SrcTy, WideTy);
2373
2374 Register WideSrc = SrcReg;
2375 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2376 // TODO: If this is an integral address space, cast to integer and anyext.
2377 if (SrcTy.isPointer()) {
2378 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2379 return UnableToLegalize;
2380 }
2381
2382 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2383 }
2384
2385 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
2386
2387 // Create a sequence of unmerges and merges to the original results. Since we
2388 // may have widened the source, we will need to pad the results with dead defs
2389 // to cover the source register.
2390 // e.g. widen s48 to s64:
2391 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2392 //
2393 // =>
2394 // %4:_(s192) = G_ANYEXT %0:_(s96)
2395 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2396 // ; unpack to GCD type, with extra dead defs
2397 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2398 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2399 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2400 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2401 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2402 const LLT GCDTy = getGCDType(WideTy, DstTy);
2403 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2404 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2405
2406 // Directly unmerge to the destination without going through a GCD type
2407 // if possible
2408 if (PartsPerRemerge == 1) {
2409 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2410
2411 for (int I = 0; I != NumUnmerge; ++I) {
2412 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2413
2414 for (int J = 0; J != PartsPerUnmerge; ++J) {
2415 int Idx = I * PartsPerUnmerge + J;
2416 if (Idx < NumDst)
2417 MIB.addDef(MI.getOperand(Idx).getReg());
2418 else {
2419 // Create dead def for excess components.
2420 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2421 }
2422 }
2423
2424 MIB.addUse(Unmerge.getReg(I));
2425 }
2426 } else {
2427 SmallVector<Register, 16> Parts;
2428 for (int J = 0; J != NumUnmerge; ++J)
2429 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2430
2431 SmallVector<Register, 8> RemergeParts;
2432 for (int I = 0; I != NumDst; ++I) {
2433 for (int J = 0; J < PartsPerRemerge; ++J) {
2434 const int Idx = I * PartsPerRemerge + J;
2435 RemergeParts.emplace_back(Parts[Idx]);
2436 }
2437
2438 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2439 RemergeParts.clear();
2440 }
2441 }
2442
2443 MI.eraseFromParent();
2444 return Legalized;
2445}
2446
2448LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2449 LLT WideTy) {
2450 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2451 unsigned Offset = MI.getOperand(2).getImm();
2452
2453 if (TypeIdx == 0) {
2454 if (SrcTy.isVector() || DstTy.isVector())
2455 return UnableToLegalize;
2456
2457 SrcOp Src(SrcReg);
2458 if (SrcTy.isPointer()) {
2459 // Extracts from pointers can be handled only if they are really just
2460 // simple integers.
2461 const DataLayout &DL = MIRBuilder.getDataLayout();
2462 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2463 return UnableToLegalize;
2464
2465 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2466 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2467 SrcTy = SrcAsIntTy;
2468 }
2469
2470 if (DstTy.isPointer())
2471 return UnableToLegalize;
2472
2473 if (Offset == 0) {
2474 // Avoid a shift in the degenerate case.
2475 MIRBuilder.buildTrunc(DstReg,
2476 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2477 MI.eraseFromParent();
2478 return Legalized;
2479 }
2480
2481 // Do a shift in the source type.
2482 LLT ShiftTy = SrcTy;
2483 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2484 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2485 ShiftTy = WideTy;
2486 }
2487
2488 auto LShr = MIRBuilder.buildLShr(
2489 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2490 MIRBuilder.buildTrunc(DstReg, LShr);
2491 MI.eraseFromParent();
2492 return Legalized;
2493 }
2494
2495 if (SrcTy.isScalar()) {
2496 Observer.changingInstr(MI);
2497 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2498 Observer.changedInstr(MI);
2499 return Legalized;
2500 }
2501
2502 if (!SrcTy.isVector())
2503 return UnableToLegalize;
2504
2505 if (DstTy != SrcTy.getElementType())
2506 return UnableToLegalize;
2507
2508 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2509 return UnableToLegalize;
2510
2511 Observer.changingInstr(MI);
2512 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2513
2514 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2515 Offset);
2516 widenScalarDst(MI, WideTy.getScalarType(), 0);
2517 Observer.changedInstr(MI);
2518 return Legalized;
2519}
2520
2522LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2523 LLT WideTy) {
2524 if (TypeIdx != 0 || WideTy.isVector())
2525 return UnableToLegalize;
2526 Observer.changingInstr(MI);
2527 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2528 widenScalarDst(MI, WideTy);
2529 Observer.changedInstr(MI);
2530 return Legalized;
2531}
2532
2534LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2535 LLT WideTy) {
2536 unsigned Opcode;
2537 unsigned ExtOpcode;
2538 std::optional<Register> CarryIn;
2539 switch (MI.getOpcode()) {
2540 default:
2541 llvm_unreachable("Unexpected opcode!");
2542 case TargetOpcode::G_SADDO:
2543 Opcode = TargetOpcode::G_ADD;
2544 ExtOpcode = TargetOpcode::G_SEXT;
2545 break;
2546 case TargetOpcode::G_SSUBO:
2547 Opcode = TargetOpcode::G_SUB;
2548 ExtOpcode = TargetOpcode::G_SEXT;
2549 break;
2550 case TargetOpcode::G_UADDO:
2551 Opcode = TargetOpcode::G_ADD;
2552 ExtOpcode = TargetOpcode::G_ZEXT;
2553 break;
2554 case TargetOpcode::G_USUBO:
2555 Opcode = TargetOpcode::G_SUB;
2556 ExtOpcode = TargetOpcode::G_ZEXT;
2557 break;
2558 case TargetOpcode::G_SADDE:
2559 Opcode = TargetOpcode::G_UADDE;
2560 ExtOpcode = TargetOpcode::G_SEXT;
2561 CarryIn = MI.getOperand(4).getReg();
2562 break;
2563 case TargetOpcode::G_SSUBE:
2564 Opcode = TargetOpcode::G_USUBE;
2565 ExtOpcode = TargetOpcode::G_SEXT;
2566 CarryIn = MI.getOperand(4).getReg();
2567 break;
2568 case TargetOpcode::G_UADDE:
2569 Opcode = TargetOpcode::G_UADDE;
2570 ExtOpcode = TargetOpcode::G_ZEXT;
2571 CarryIn = MI.getOperand(4).getReg();
2572 break;
2573 case TargetOpcode::G_USUBE:
2574 Opcode = TargetOpcode::G_USUBE;
2575 ExtOpcode = TargetOpcode::G_ZEXT;
2576 CarryIn = MI.getOperand(4).getReg();
2577 break;
2578 }
2579
2580 if (TypeIdx == 1) {
2581 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2582
2583 Observer.changingInstr(MI);
2584 if (CarryIn)
2585 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2586 widenScalarDst(MI, WideTy, 1);
2587
2588 Observer.changedInstr(MI);
2589 return Legalized;
2590 }
2591
2592 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2593 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2594 // Do the arithmetic in the larger type.
2595 Register NewOp;
2596 if (CarryIn) {
2597 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2598 NewOp = MIRBuilder
2599 .buildInstr(Opcode, {WideTy, CarryOutTy},
2600 {LHSExt, RHSExt, *CarryIn})
2601 .getReg(0);
2602 } else {
2603 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2604 }
2605 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2606 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2607 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2608 // There is no overflow if the ExtOp is the same as NewOp.
2609 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2610 // Now trunc the NewOp to the original result.
2611 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2612 MI.eraseFromParent();
2613 return Legalized;
2614}
2615
2617LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2618 LLT WideTy) {
2619 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2620 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2621 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2622 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2623 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2624 // We can convert this to:
2625 // 1. Any extend iN to iM
2626 // 2. SHL by M-N
2627 // 3. [US][ADD|SUB|SHL]SAT
2628 // 4. L/ASHR by M-N
2629 //
2630 // It may be more efficient to lower this to a min and a max operation in
2631 // the higher precision arithmetic if the promoted operation isn't legal,
2632 // but this decision is up to the target's lowering request.
2633 Register DstReg = MI.getOperand(0).getReg();
2634
2635 unsigned NewBits = WideTy.getScalarSizeInBits();
2636 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2637
2638 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2639 // must not left shift the RHS to preserve the shift amount.
2640 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2641 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2642 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2643 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2644 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2645 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2646
2647 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2648 {ShiftL, ShiftR}, MI.getFlags());
2649
2650 // Use a shift that will preserve the number of sign bits when the trunc is
2651 // folded away.
2652 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2653 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2654
2655 MIRBuilder.buildTrunc(DstReg, Result);
2656 MI.eraseFromParent();
2657 return Legalized;
2658}
2659
2661LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2662 LLT WideTy) {
2663 if (TypeIdx == 1) {
2664 Observer.changingInstr(MI);
2665 widenScalarDst(MI, WideTy, 1);
2666 Observer.changedInstr(MI);
2667 return Legalized;
2668 }
2669
2670 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2671 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2672 LLT SrcTy = MRI.getType(LHS);
2673 LLT OverflowTy = MRI.getType(OriginalOverflow);
2674 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2675
2676 // To determine if the result overflowed in the larger type, we extend the
2677 // input to the larger type, do the multiply (checking if it overflows),
2678 // then also check the high bits of the result to see if overflow happened
2679 // there.
2680 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2681 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2682 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2683
2684 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2685 // so we don't need to check the overflow result of larger type Mulo.
2686 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2687
2688 unsigned MulOpc =
2689 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2690
2691 MachineInstrBuilder Mulo;
2692 if (WideMulCanOverflow)
2693 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2694 {LeftOperand, RightOperand});
2695 else
2696 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2697
2698 auto Mul = Mulo->getOperand(0);
2699 MIRBuilder.buildTrunc(Result, Mul);
2700
2701 MachineInstrBuilder ExtResult;
2702 // Overflow occurred if it occurred in the larger type, or if the high part
2703 // of the result does not zero/sign-extend the low part. Check this second
2704 // possibility first.
2705 if (IsSigned) {
2706 // For signed, overflow occurred when the high part does not sign-extend
2707 // the low part.
2708 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2709 } else {
2710 // Unsigned overflow occurred when the high part does not zero-extend the
2711 // low part.
2712 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2713 }
2714
2715 if (WideMulCanOverflow) {
2716 auto Overflow =
2717 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2718 // Finally check if the multiplication in the larger type itself overflowed.
2719 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2720 } else {
2721 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2722 }
2723 MI.eraseFromParent();
2724 return Legalized;
2725}
2726
2729 unsigned Opcode = MI.getOpcode();
2730 switch (Opcode) {
2731 default:
2732 return UnableToLegalize;
2733 case TargetOpcode::G_ATOMICRMW_XCHG:
2734 case TargetOpcode::G_ATOMICRMW_ADD:
2735 case TargetOpcode::G_ATOMICRMW_SUB:
2736 case TargetOpcode::G_ATOMICRMW_AND:
2737 case TargetOpcode::G_ATOMICRMW_OR:
2738 case TargetOpcode::G_ATOMICRMW_XOR:
2739 case TargetOpcode::G_ATOMICRMW_MIN:
2740 case TargetOpcode::G_ATOMICRMW_MAX:
2741 case TargetOpcode::G_ATOMICRMW_UMIN:
2742 case TargetOpcode::G_ATOMICRMW_UMAX:
2743 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2744 Observer.changingInstr(MI);
2745 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2746 widenScalarDst(MI, WideTy, 0);
2747 Observer.changedInstr(MI);
2748 return Legalized;
2749 case TargetOpcode::G_ATOMIC_CMPXCHG:
2750 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2751 Observer.changingInstr(MI);
2752 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2753 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2754 widenScalarDst(MI, WideTy, 0);
2755 Observer.changedInstr(MI);
2756 return Legalized;
2757 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2758 if (TypeIdx == 0) {
2759 Observer.changingInstr(MI);
2760 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2761 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2762 widenScalarDst(MI, WideTy, 0);
2763 Observer.changedInstr(MI);
2764 return Legalized;
2765 }
2766 assert(TypeIdx == 1 &&
2767 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2768 Observer.changingInstr(MI);
2769 widenScalarDst(MI, WideTy, 1);
2770 Observer.changedInstr(MI);
2771 return Legalized;
2772 case TargetOpcode::G_EXTRACT:
2773 return widenScalarExtract(MI, TypeIdx, WideTy);
2774 case TargetOpcode::G_INSERT:
2775 return widenScalarInsert(MI, TypeIdx, WideTy);
2776 case TargetOpcode::G_MERGE_VALUES:
2777 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2778 case TargetOpcode::G_UNMERGE_VALUES:
2779 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2780 case TargetOpcode::G_SADDO:
2781 case TargetOpcode::G_SSUBO:
2782 case TargetOpcode::G_UADDO:
2783 case TargetOpcode::G_USUBO:
2784 case TargetOpcode::G_SADDE:
2785 case TargetOpcode::G_SSUBE:
2786 case TargetOpcode::G_UADDE:
2787 case TargetOpcode::G_USUBE:
2788 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2789 case TargetOpcode::G_UMULO:
2790 case TargetOpcode::G_SMULO:
2791 return widenScalarMulo(MI, TypeIdx, WideTy);
2792 case TargetOpcode::G_SADDSAT:
2793 case TargetOpcode::G_SSUBSAT:
2794 case TargetOpcode::G_SSHLSAT:
2795 case TargetOpcode::G_UADDSAT:
2796 case TargetOpcode::G_USUBSAT:
2797 case TargetOpcode::G_USHLSAT:
2798 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2799 case TargetOpcode::G_CTTZ:
2800 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2801 case TargetOpcode::G_CTLZ:
2802 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2803 case TargetOpcode::G_CTLS:
2804 case TargetOpcode::G_CTPOP: {
2805 if (TypeIdx == 0) {
2806 Observer.changingInstr(MI);
2807 widenScalarDst(MI, WideTy, 0);
2808 Observer.changedInstr(MI);
2809 return Legalized;
2810 }
2811
2812 Register SrcReg = MI.getOperand(1).getReg();
2813
2814 // First extend the input.
2815 unsigned ExtOpc;
2816 switch (Opcode) {
2817 case TargetOpcode::G_CTTZ:
2818 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2819 case TargetOpcode::G_CTLZ_ZERO_UNDEF: // undef bits shifted out below
2820 ExtOpc = TargetOpcode::G_ANYEXT;
2821 break;
2822 case TargetOpcode::G_CTLS:
2823 ExtOpc = TargetOpcode::G_SEXT;
2824 break;
2825 default:
2826 ExtOpc = TargetOpcode::G_ZEXT;
2827 }
2828
2829 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2830 LLT CurTy = MRI.getType(SrcReg);
2831 unsigned NewOpc = Opcode;
2832 if (NewOpc == TargetOpcode::G_CTTZ) {
2833 // The count is the same in the larger type except if the original
2834 // value was zero. This can be handled by setting the bit just off
2835 // the top of the original type.
2836 auto TopBit =
2838 MIBSrc = MIRBuilder.buildOr(
2839 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2840 // Now we know the operand is non-zero, use the more relaxed opcode.
2841 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2842 }
2843
2844 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2845
2846 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2847 // An optimization where the result is the CTLZ after the left shift by
2848 // (Difference in widety and current ty), that is,
2849 // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
2850 // Result = ctlz MIBSrc
2851 MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc,
2852 MIRBuilder.buildConstant(WideTy, SizeDiff));
2853 }
2854
2855 // Perform the operation at the larger size.
2856 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2857 // This is already the correct result for CTPOP and CTTZs
2858 if (Opcode == TargetOpcode::G_CTLZ || Opcode == TargetOpcode::G_CTLS) {
2859 // The correct result is NewOp - (Difference in widety and current ty).
2860 // At this stage SUB is guaranteed to be positive no-wrap,
2861 // that to be used in further KnownBits optimizations for CTLZ.
2862 MIBNewOp = MIRBuilder.buildSub(
2863 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff),
2864 Opcode == TargetOpcode::G_CTLZ
2865 ? std::optional<unsigned>(MachineInstr::NoUWrap)
2866 : std::nullopt);
2867 }
2868
2869 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2870 MI.eraseFromParent();
2871 return Legalized;
2872 }
2873 case TargetOpcode::G_BSWAP: {
2874 Observer.changingInstr(MI);
2875 Register DstReg = MI.getOperand(0).getReg();
2876
2877 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2878 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2879 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2880 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2881
2882 MI.getOperand(0).setReg(DstExt);
2883
2884 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2885
2886 LLT Ty = MRI.getType(DstReg);
2887 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2888 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2889 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2890
2891 MIRBuilder.buildTrunc(DstReg, ShrReg);
2892 Observer.changedInstr(MI);
2893 return Legalized;
2894 }
2895 case TargetOpcode::G_BITREVERSE: {
2896 Observer.changingInstr(MI);
2897
2898 Register DstReg = MI.getOperand(0).getReg();
2899 LLT Ty = MRI.getType(DstReg);
2900 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2901
2902 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2903 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2904 MI.getOperand(0).setReg(DstExt);
2905 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2906
2907 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2908 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2909 MIRBuilder.buildTrunc(DstReg, Shift);
2910 Observer.changedInstr(MI);
2911 return Legalized;
2912 }
2913 case TargetOpcode::G_FREEZE:
2914 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2915 Observer.changingInstr(MI);
2916 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2917 widenScalarDst(MI, WideTy);
2918 Observer.changedInstr(MI);
2919 return Legalized;
2920
2921 case TargetOpcode::G_ABS:
2922 Observer.changingInstr(MI);
2923 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2924 widenScalarDst(MI, WideTy);
2925 Observer.changedInstr(MI);
2926 return Legalized;
2927
2928 case TargetOpcode::G_ADD:
2929 case TargetOpcode::G_AND:
2930 case TargetOpcode::G_MUL:
2931 case TargetOpcode::G_OR:
2932 case TargetOpcode::G_XOR:
2933 case TargetOpcode::G_SUB:
2934 case TargetOpcode::G_SHUFFLE_VECTOR:
2935 // Perform operation at larger width (any extension is fines here, high bits
2936 // don't affect the result) and then truncate the result back to the
2937 // original type.
2938 Observer.changingInstr(MI);
2939 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2940 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2941 widenScalarDst(MI, WideTy);
2942 Observer.changedInstr(MI);
2943 return Legalized;
2944
2945 case TargetOpcode::G_SBFX:
2946 case TargetOpcode::G_UBFX:
2947 Observer.changingInstr(MI);
2948
2949 if (TypeIdx == 0) {
2950 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2951 widenScalarDst(MI, WideTy);
2952 } else {
2953 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2954 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2955 }
2956
2957 Observer.changedInstr(MI);
2958 return Legalized;
2959
2960 case TargetOpcode::G_SHL:
2961 Observer.changingInstr(MI);
2962
2963 if (TypeIdx == 0) {
2964 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2965 widenScalarDst(MI, WideTy);
2966 } else {
2967 assert(TypeIdx == 1);
2968 // The "number of bits to shift" operand must preserve its value as an
2969 // unsigned integer:
2970 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2971 }
2972
2973 Observer.changedInstr(MI);
2974 return Legalized;
2975
2976 case TargetOpcode::G_ROTR:
2977 case TargetOpcode::G_ROTL:
2978 if (TypeIdx != 1)
2979 return UnableToLegalize;
2980
2981 Observer.changingInstr(MI);
2982 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2983 Observer.changedInstr(MI);
2984 return Legalized;
2985
2986 case TargetOpcode::G_SDIV:
2987 case TargetOpcode::G_SREM:
2988 case TargetOpcode::G_SMIN:
2989 case TargetOpcode::G_SMAX:
2990 case TargetOpcode::G_ABDS:
2991 Observer.changingInstr(MI);
2992 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2993 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2994 widenScalarDst(MI, WideTy);
2995 Observer.changedInstr(MI);
2996 return Legalized;
2997
2998 case TargetOpcode::G_SDIVREM:
2999 Observer.changingInstr(MI);
3000 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3001 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
3002 widenScalarDst(MI, WideTy);
3003 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3004 widenScalarDst(MI, WideTy, 1);
3005 Observer.changedInstr(MI);
3006 return Legalized;
3007
3008 case TargetOpcode::G_ASHR:
3009 case TargetOpcode::G_LSHR:
3010 Observer.changingInstr(MI);
3011
3012 if (TypeIdx == 0) {
3013 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
3014 : TargetOpcode::G_ZEXT;
3015
3016 widenScalarSrc(MI, WideTy, 1, CvtOp);
3017 widenScalarDst(MI, WideTy);
3018 } else {
3019 assert(TypeIdx == 1);
3020 // The "number of bits to shift" operand must preserve its value as an
3021 // unsigned integer:
3022 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3023 }
3024
3025 Observer.changedInstr(MI);
3026 return Legalized;
3027 case TargetOpcode::G_UDIV:
3028 case TargetOpcode::G_UREM:
3029 case TargetOpcode::G_ABDU:
3030 Observer.changingInstr(MI);
3031 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3032 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3033 widenScalarDst(MI, WideTy);
3034 Observer.changedInstr(MI);
3035 return Legalized;
3036 case TargetOpcode::G_UDIVREM:
3037 Observer.changingInstr(MI);
3038 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3039 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
3040 widenScalarDst(MI, WideTy);
3041 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3042 widenScalarDst(MI, WideTy, 1);
3043 Observer.changedInstr(MI);
3044 return Legalized;
3045 case TargetOpcode::G_UMIN:
3046 case TargetOpcode::G_UMAX: {
3047 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3048
3049 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3050 unsigned ExtOpc =
3051 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(Ty, Ctx),
3052 getApproximateEVTForLLT(WideTy, Ctx))
3053 ? TargetOpcode::G_SEXT
3054 : TargetOpcode::G_ZEXT;
3055
3056 Observer.changingInstr(MI);
3057 widenScalarSrc(MI, WideTy, 1, ExtOpc);
3058 widenScalarSrc(MI, WideTy, 2, ExtOpc);
3059 widenScalarDst(MI, WideTy);
3060 Observer.changedInstr(MI);
3061 return Legalized;
3062 }
3063
3064 case TargetOpcode::G_SELECT:
3065 Observer.changingInstr(MI);
3066 if (TypeIdx == 0) {
3067 // Perform operation at larger width (any extension is fine here, high
3068 // bits don't affect the result) and then truncate the result back to the
3069 // original type.
3070 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3071 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
3072 widenScalarDst(MI, WideTy);
3073 } else {
3074 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
3075 // Explicit extension is required here since high bits affect the result.
3076 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
3077 }
3078 Observer.changedInstr(MI);
3079 return Legalized;
3080
3081 case TargetOpcode::G_FPEXT:
3082 if (TypeIdx != 1)
3083 return UnableToLegalize;
3084
3085 Observer.changingInstr(MI);
3086 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3087 Observer.changedInstr(MI);
3088 return Legalized;
3089 case TargetOpcode::G_FPTOSI:
3090 case TargetOpcode::G_FPTOUI:
3091 case TargetOpcode::G_INTRINSIC_LRINT:
3092 case TargetOpcode::G_INTRINSIC_LLRINT:
3093 case TargetOpcode::G_IS_FPCLASS:
3094 Observer.changingInstr(MI);
3095
3096 if (TypeIdx == 0)
3097 widenScalarDst(MI, WideTy);
3098 else
3099 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3100
3101 Observer.changedInstr(MI);
3102 return Legalized;
3103 case TargetOpcode::G_SITOFP:
3104 Observer.changingInstr(MI);
3105
3106 if (TypeIdx == 0)
3107 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3108 else
3109 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
3110
3111 Observer.changedInstr(MI);
3112 return Legalized;
3113 case TargetOpcode::G_UITOFP:
3114 Observer.changingInstr(MI);
3115
3116 if (TypeIdx == 0)
3117 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3118 else
3119 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3120
3121 Observer.changedInstr(MI);
3122 return Legalized;
3123 case TargetOpcode::G_FPTOSI_SAT:
3124 case TargetOpcode::G_FPTOUI_SAT:
3125 Observer.changingInstr(MI);
3126
3127 if (TypeIdx == 0) {
3128 Register OldDst = MI.getOperand(0).getReg();
3129 LLT Ty = MRI.getType(OldDst);
3130 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
3131 Register NewDst;
3132 MI.getOperand(0).setReg(ExtReg);
3133 uint64_t ShortBits = Ty.getScalarSizeInBits();
3134 uint64_t WideBits = WideTy.getScalarSizeInBits();
3135 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
3136 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3137 // z = i16 fptosi_sat(a)
3138 // ->
3139 // x = i32 fptosi_sat(a)
3140 // y = smin(x, 32767)
3141 // z = smax(y, -32768)
3142 auto MaxVal = MIRBuilder.buildConstant(
3143 WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
3144 auto MinVal = MIRBuilder.buildConstant(
3145 WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
3146 Register MidReg =
3147 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
3148 NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
3149 } else {
3150 // z = i16 fptoui_sat(a)
3151 // ->
3152 // x = i32 fptoui_sat(a)
3153 // y = smin(x, 65535)
3154 auto MaxVal = MIRBuilder.buildConstant(
3155 WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
3156 NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3157 }
3158 MIRBuilder.buildTrunc(OldDst, NewDst);
3159 } else
3160 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3161
3162 Observer.changedInstr(MI);
3163 return Legalized;
3164 case TargetOpcode::G_LOAD:
3165 case TargetOpcode::G_SEXTLOAD:
3166 case TargetOpcode::G_ZEXTLOAD:
3167 Observer.changingInstr(MI);
3168 widenScalarDst(MI, WideTy);
3169 Observer.changedInstr(MI);
3170 return Legalized;
3171
3172 case TargetOpcode::G_STORE: {
3173 if (TypeIdx != 0)
3174 return UnableToLegalize;
3175
3176 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3177 assert(!Ty.isPointerOrPointerVector() && "Can't widen type");
3178 if (!Ty.isScalar()) {
3179 // We need to widen the vector element type.
3180 Observer.changingInstr(MI);
3181 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
3182 // We also need to adjust the MMO to turn this into a truncating store.
3183 MachineMemOperand &MMO = **MI.memoperands_begin();
3184 MachineFunction &MF = MIRBuilder.getMF();
3185 auto *NewMMO = MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), Ty);
3186 MI.setMemRefs(MF, {NewMMO});
3187 Observer.changedInstr(MI);
3188 return Legalized;
3189 }
3190
3191 Observer.changingInstr(MI);
3192
3193 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3194 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3195 widenScalarSrc(MI, WideTy, 0, ExtType);
3196
3197 Observer.changedInstr(MI);
3198 return Legalized;
3199 }
3200 case TargetOpcode::G_CONSTANT: {
3201 MachineOperand &SrcMO = MI.getOperand(1);
3202 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3203 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3204 MRI.getType(MI.getOperand(0).getReg()));
3205 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3206 ExtOpc == TargetOpcode::G_ANYEXT) &&
3207 "Illegal Extend");
3208 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3209 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3210 ? SrcVal.sext(WideTy.getSizeInBits())
3211 : SrcVal.zext(WideTy.getSizeInBits());
3212 Observer.changingInstr(MI);
3213 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3214
3215 widenScalarDst(MI, WideTy);
3216 Observer.changedInstr(MI);
3217 return Legalized;
3218 }
3219 case TargetOpcode::G_FCONSTANT: {
3220 // To avoid changing the bits of the constant due to extension to a larger
3221 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
3222 MachineOperand &SrcMO = MI.getOperand(1);
3223 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
3224 MIRBuilder.setInstrAndDebugLoc(MI);
3225 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
3226 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
3227 MI.eraseFromParent();
3228 return Legalized;
3229 }
3230 case TargetOpcode::G_IMPLICIT_DEF: {
3231 Observer.changingInstr(MI);
3232 widenScalarDst(MI, WideTy);
3233 Observer.changedInstr(MI);
3234 return Legalized;
3235 }
3236 case TargetOpcode::G_BRCOND:
3237 Observer.changingInstr(MI);
3238 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
3239 Observer.changedInstr(MI);
3240 return Legalized;
3241
3242 case TargetOpcode::G_FCMP:
3243 Observer.changingInstr(MI);
3244 if (TypeIdx == 0)
3245 widenScalarDst(MI, WideTy);
3246 else {
3247 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3248 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
3249 }
3250 Observer.changedInstr(MI);
3251 return Legalized;
3252
3253 case TargetOpcode::G_ICMP:
3254 Observer.changingInstr(MI);
3255 if (TypeIdx == 0)
3256 widenScalarDst(MI, WideTy);
3257 else {
3258 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
3259 CmpInst::Predicate Pred =
3260 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3261
3262 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3263 unsigned ExtOpcode =
3264 (CmpInst::isSigned(Pred) ||
3265 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(SrcTy, Ctx),
3266 getApproximateEVTForLLT(WideTy, Ctx)))
3267 ? TargetOpcode::G_SEXT
3268 : TargetOpcode::G_ZEXT;
3269 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
3270 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
3271 }
3272 Observer.changedInstr(MI);
3273 return Legalized;
3274
3275 case TargetOpcode::G_PTR_ADD:
3276 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
3277 Observer.changingInstr(MI);
3278 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3279 Observer.changedInstr(MI);
3280 return Legalized;
3281
3282 case TargetOpcode::G_PHI: {
3283 assert(TypeIdx == 0 && "Expecting only Idx 0");
3284
3285 Observer.changingInstr(MI);
3286 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
3287 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3288 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
3289 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
3290 }
3291
3292 MachineBasicBlock &MBB = *MI.getParent();
3293 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
3294 widenScalarDst(MI, WideTy);
3295 Observer.changedInstr(MI);
3296 return Legalized;
3297 }
3298 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3299 if (TypeIdx == 0) {
3300 Register VecReg = MI.getOperand(1).getReg();
3301 LLT VecTy = MRI.getType(VecReg);
3302 Observer.changingInstr(MI);
3303
3305 MI,
3307 TargetOpcode::G_ANYEXT);
3308
3309 widenScalarDst(MI, WideTy, 0);
3310 Observer.changedInstr(MI);
3311 return Legalized;
3312 }
3313
3314 if (TypeIdx != 2)
3315 return UnableToLegalize;
3316 Observer.changingInstr(MI);
3317 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3318 Observer.changedInstr(MI);
3319 return Legalized;
3320 }
3321 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3322 if (TypeIdx == 0) {
3323 Observer.changingInstr(MI);
3324 const LLT WideEltTy = WideTy.getElementType();
3325
3326 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3327 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
3328 widenScalarDst(MI, WideTy, 0);
3329 Observer.changedInstr(MI);
3330 return Legalized;
3331 }
3332
3333 if (TypeIdx == 1) {
3334 Observer.changingInstr(MI);
3335
3336 Register VecReg = MI.getOperand(1).getReg();
3337 LLT VecTy = MRI.getType(VecReg);
3338 LLT WideVecTy = VecTy.changeVectorElementType(WideTy);
3339
3340 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
3341 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3342 widenScalarDst(MI, WideVecTy, 0);
3343 Observer.changedInstr(MI);
3344 return Legalized;
3345 }
3346
3347 if (TypeIdx == 2) {
3348 Observer.changingInstr(MI);
3349 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
3350 Observer.changedInstr(MI);
3351 return Legalized;
3352 }
3353
3354 return UnableToLegalize;
3355 }
3356 case TargetOpcode::G_FADD:
3357 case TargetOpcode::G_FMUL:
3358 case TargetOpcode::G_FSUB:
3359 case TargetOpcode::G_FMA:
3360 case TargetOpcode::G_FMAD:
3361 case TargetOpcode::G_FNEG:
3362 case TargetOpcode::G_FABS:
3363 case TargetOpcode::G_FCANONICALIZE:
3364 case TargetOpcode::G_FMINNUM:
3365 case TargetOpcode::G_FMAXNUM:
3366 case TargetOpcode::G_FMINNUM_IEEE:
3367 case TargetOpcode::G_FMAXNUM_IEEE:
3368 case TargetOpcode::G_FMINIMUM:
3369 case TargetOpcode::G_FMAXIMUM:
3370 case TargetOpcode::G_FMINIMUMNUM:
3371 case TargetOpcode::G_FMAXIMUMNUM:
3372 case TargetOpcode::G_FDIV:
3373 case TargetOpcode::G_FREM:
3374 case TargetOpcode::G_FCEIL:
3375 case TargetOpcode::G_FFLOOR:
3376 case TargetOpcode::G_FCOS:
3377 case TargetOpcode::G_FSIN:
3378 case TargetOpcode::G_FTAN:
3379 case TargetOpcode::G_FACOS:
3380 case TargetOpcode::G_FASIN:
3381 case TargetOpcode::G_FATAN:
3382 case TargetOpcode::G_FATAN2:
3383 case TargetOpcode::G_FCOSH:
3384 case TargetOpcode::G_FSINH:
3385 case TargetOpcode::G_FTANH:
3386 case TargetOpcode::G_FLOG10:
3387 case TargetOpcode::G_FLOG:
3388 case TargetOpcode::G_FLOG2:
3389 case TargetOpcode::G_FRINT:
3390 case TargetOpcode::G_FNEARBYINT:
3391 case TargetOpcode::G_FSQRT:
3392 case TargetOpcode::G_FEXP:
3393 case TargetOpcode::G_FEXP2:
3394 case TargetOpcode::G_FEXP10:
3395 case TargetOpcode::G_FPOW:
3396 case TargetOpcode::G_INTRINSIC_TRUNC:
3397 case TargetOpcode::G_INTRINSIC_ROUND:
3398 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3399 assert(TypeIdx == 0);
3400 Observer.changingInstr(MI);
3401
3402 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3403 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
3404
3405 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3406 Observer.changedInstr(MI);
3407 return Legalized;
3408 case TargetOpcode::G_FMODF: {
3409 Observer.changingInstr(MI);
3410 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3411
3412 widenScalarDst(MI, WideTy, 1, TargetOpcode::G_FPTRUNC);
3413 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3414 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3415 Observer.changedInstr(MI);
3416 return Legalized;
3417 }
3418 case TargetOpcode::G_FPOWI:
3419 case TargetOpcode::G_FLDEXP:
3420 case TargetOpcode::G_STRICT_FLDEXP: {
3421 if (TypeIdx == 0) {
3422 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3423 return UnableToLegalize;
3424
3425 Observer.changingInstr(MI);
3426 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3427 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3428 Observer.changedInstr(MI);
3429 return Legalized;
3430 }
3431
3432 if (TypeIdx == 1) {
3433 // For some reason SelectionDAG tries to promote to a libcall without
3434 // actually changing the integer type for promotion.
3435 Observer.changingInstr(MI);
3436 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3437 Observer.changedInstr(MI);
3438 return Legalized;
3439 }
3440
3441 return UnableToLegalize;
3442 }
3443 case TargetOpcode::G_FFREXP: {
3444 Observer.changingInstr(MI);
3445
3446 if (TypeIdx == 0) {
3447 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3448 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3449 } else {
3450 widenScalarDst(MI, WideTy, 1);
3451 }
3452
3453 Observer.changedInstr(MI);
3454 return Legalized;
3455 }
3456 case TargetOpcode::G_LROUND:
3457 case TargetOpcode::G_LLROUND:
3458 Observer.changingInstr(MI);
3459
3460 if (TypeIdx == 0)
3461 widenScalarDst(MI, WideTy);
3462 else
3463 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3464
3465 Observer.changedInstr(MI);
3466 return Legalized;
3467
3468 case TargetOpcode::G_INTTOPTR:
3469 if (TypeIdx != 1)
3470 return UnableToLegalize;
3471
3472 Observer.changingInstr(MI);
3473 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3474 Observer.changedInstr(MI);
3475 return Legalized;
3476 case TargetOpcode::G_PTRTOINT:
3477 if (TypeIdx != 0)
3478 return UnableToLegalize;
3479
3480 Observer.changingInstr(MI);
3481 widenScalarDst(MI, WideTy, 0);
3482 Observer.changedInstr(MI);
3483 return Legalized;
3484 case TargetOpcode::G_BUILD_VECTOR: {
3485 Observer.changingInstr(MI);
3486
3487 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
3488 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
3489 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
3490
3491 // Avoid changing the result vector type if the source element type was
3492 // requested.
3493 if (TypeIdx == 1) {
3494 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3495 } else {
3496 widenScalarDst(MI, WideTy, 0);
3497 }
3498
3499 Observer.changedInstr(MI);
3500 return Legalized;
3501 }
3502 case TargetOpcode::G_SEXT_INREG:
3503 if (TypeIdx != 0)
3504 return UnableToLegalize;
3505
3506 Observer.changingInstr(MI);
3507 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3508 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
3509 Observer.changedInstr(MI);
3510 return Legalized;
3511 case TargetOpcode::G_PTRMASK: {
3512 if (TypeIdx != 1)
3513 return UnableToLegalize;
3514 Observer.changingInstr(MI);
3515 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3516 Observer.changedInstr(MI);
3517 return Legalized;
3518 }
3519 case TargetOpcode::G_VECREDUCE_ADD: {
3520 if (TypeIdx != 1)
3521 return UnableToLegalize;
3522 Observer.changingInstr(MI);
3523 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3524 widenScalarDst(MI, WideTy.getScalarType(), 0, TargetOpcode::G_TRUNC);
3525 Observer.changedInstr(MI);
3526 return Legalized;
3527 }
3528 case TargetOpcode::G_VECREDUCE_FADD:
3529 case TargetOpcode::G_VECREDUCE_FMUL:
3530 case TargetOpcode::G_VECREDUCE_FMIN:
3531 case TargetOpcode::G_VECREDUCE_FMAX:
3532 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3533 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3534 if (TypeIdx != 0)
3535 return UnableToLegalize;
3536 Observer.changingInstr(MI);
3537 Register VecReg = MI.getOperand(1).getReg();
3538 LLT VecTy = MRI.getType(VecReg);
3539 LLT WideVecTy = VecTy.changeElementType(WideTy);
3540 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3541 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3542 Observer.changedInstr(MI);
3543 return Legalized;
3544 }
3545 case TargetOpcode::G_VSCALE: {
3546 MachineOperand &SrcMO = MI.getOperand(1);
3547 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3548 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3549 // The CImm is always a signed value
3550 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3551 Observer.changingInstr(MI);
3552 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3553 widenScalarDst(MI, WideTy);
3554 Observer.changedInstr(MI);
3555 return Legalized;
3556 }
3557 case TargetOpcode::G_SPLAT_VECTOR: {
3558 if (TypeIdx != 1)
3559 return UnableToLegalize;
3560
3561 Observer.changingInstr(MI);
3562 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3563 Observer.changedInstr(MI);
3564 return Legalized;
3565 }
3566 case TargetOpcode::G_INSERT_SUBVECTOR: {
3567 if (TypeIdx != 0)
3568 return UnableToLegalize;
3569
3571 Register BigVec = IS.getBigVec();
3572 Register SubVec = IS.getSubVec();
3573
3574 LLT SubVecTy = MRI.getType(SubVec);
3575 LLT SubVecWideTy = SubVecTy.changeElementType(WideTy.getElementType());
3576
3577 // Widen the G_INSERT_SUBVECTOR
3578 auto BigZExt = MIRBuilder.buildZExt(WideTy, BigVec);
3579 auto SubZExt = MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3580 auto WideInsert = MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3581 IS.getIndexImm());
3582
3583 // Truncate back down
3584 auto SplatZero = MIRBuilder.buildSplatVector(
3585 WideTy, MIRBuilder.buildConstant(WideTy.getElementType(), 0));
3586 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, IS.getReg(0), WideInsert,
3587 SplatZero);
3588
3589 MI.eraseFromParent();
3590
3591 return Legalized;
3592 }
3593 }
3594}
3595
3597 MachineIRBuilder &B, Register Src, LLT Ty) {
3598 auto Unmerge = B.buildUnmerge(Ty, Src);
3599 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3600 Pieces.push_back(Unmerge.getReg(I));
3601}
3602
3603static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3604 MachineIRBuilder &MIRBuilder) {
3605 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3606 MachineFunction &MF = MIRBuilder.getMF();
3607 const DataLayout &DL = MIRBuilder.getDataLayout();
3608 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3609 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3610 LLT DstLLT = MRI.getType(DstReg);
3611
3612 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3613
3614 auto Addr = MIRBuilder.buildConstantPool(
3615 AddrPtrTy,
3616 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3617
3618 MachineMemOperand *MMO =
3620 MachineMemOperand::MOLoad, DstLLT, Alignment);
3621
3622 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3623}
3624
3627 const MachineOperand &ConstOperand = MI.getOperand(1);
3628 const Constant *ConstantVal = ConstOperand.getCImm();
3629
3630 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3631 MI.eraseFromParent();
3632
3633 return Legalized;
3634}
3635
3638 const MachineOperand &ConstOperand = MI.getOperand(1);
3639 const Constant *ConstantVal = ConstOperand.getFPImm();
3640
3641 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3642 MI.eraseFromParent();
3643
3644 return Legalized;
3645}
3646
3649 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3650 if (SrcTy.isVector()) {
3651 LLT SrcEltTy = SrcTy.getElementType();
3653
3654 if (DstTy.isVector()) {
3655 int NumDstElt = DstTy.getNumElements();
3656 int NumSrcElt = SrcTy.getNumElements();
3657
3658 LLT DstEltTy = DstTy.getElementType();
3659 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3660 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3661
3662 // If there's an element size mismatch, insert intermediate casts to match
3663 // the result element type.
3664 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3665 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3666 //
3667 // =>
3668 //
3669 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3670 // %3:_(<2 x s8>) = G_BITCAST %2
3671 // %4:_(<2 x s8>) = G_BITCAST %3
3672 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3673 DstCastTy = DstTy.changeVectorElementCount(
3674 ElementCount::getFixed(NumDstElt / NumSrcElt));
3675 SrcPartTy = SrcEltTy;
3676 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3677 //
3678 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3679 //
3680 // =>
3681 //
3682 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3683 // %3:_(s16) = G_BITCAST %2
3684 // %4:_(s16) = G_BITCAST %3
3685 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3686 SrcPartTy = SrcTy.changeVectorElementCount(
3687 ElementCount::getFixed(NumSrcElt / NumDstElt));
3688 DstCastTy = DstEltTy;
3689 }
3690
3691 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3692 for (Register &SrcReg : SrcRegs)
3693 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3694 } else
3695 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3696
3697 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3698 MI.eraseFromParent();
3699 return Legalized;
3700 }
3701
3702 if (DstTy.isVector()) {
3704 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3705 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3706 MI.eraseFromParent();
3707 return Legalized;
3708 }
3709
3710 return UnableToLegalize;
3711}
3712
3713/// Figure out the bit offset into a register when coercing a vector index for
3714/// the wide element type. This is only for the case when promoting vector to
3715/// one with larger elements.
3716//
3717///
3718/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3719/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3721 Register Idx,
3722 unsigned NewEltSize,
3723 unsigned OldEltSize) {
3724 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3725 LLT IdxTy = B.getMRI()->getType(Idx);
3726
3727 // Now figure out the amount we need to shift to get the target bits.
3728 auto OffsetMask = B.buildConstant(
3729 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3730 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3731 return B.buildShl(IdxTy, OffsetIdx,
3732 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3733}
3734
3735/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3736/// is casting to a vector with a smaller element size, perform multiple element
3737/// extracts and merge the results. If this is coercing to a vector with larger
3738/// elements, index the bitcasted vector and extract the target element with bit
3739/// operations. This is intended to force the indexing in the native register
3740/// size for architectures that can dynamically index the register file.
3743 LLT CastTy) {
3744 if (TypeIdx != 1)
3745 return UnableToLegalize;
3746
3747 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3748
3749 LLT SrcEltTy = SrcVecTy.getElementType();
3750 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3751 unsigned OldNumElts = SrcVecTy.getNumElements();
3752
3753 LLT NewEltTy = CastTy.getScalarType();
3754 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3755
3756 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3757 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3758 if (NewNumElts > OldNumElts) {
3759 // Decreasing the vector element size
3760 //
3761 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3762 // =>
3763 // v4i32:castx = bitcast x:v2i64
3764 //
3765 // i64 = bitcast
3766 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3767 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3768 //
3769 if (NewNumElts % OldNumElts != 0)
3770 return UnableToLegalize;
3771
3772 // Type of the intermediate result vector.
3773 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3774 LLT MidTy =
3775 CastTy.changeElementCount(ElementCount::getFixed(NewEltsPerOldElt));
3776
3777 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3778
3779 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3780 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3781
3782 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3783 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3784 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3785 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3786 NewOps[I] = Elt.getReg(0);
3787 }
3788
3789 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3790 MIRBuilder.buildBitcast(Dst, NewVec);
3791 MI.eraseFromParent();
3792 return Legalized;
3793 }
3794
3795 if (NewNumElts < OldNumElts) {
3796 if (NewEltSize % OldEltSize != 0)
3797 return UnableToLegalize;
3798
3799 // This only depends on powers of 2 because we use bit tricks to figure out
3800 // the bit offset we need to shift to get the target element. A general
3801 // expansion could emit division/multiply.
3802 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3803 return UnableToLegalize;
3804
3805 // Increasing the vector element size.
3806 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3807 //
3808 // =>
3809 //
3810 // %cast = G_BITCAST %vec
3811 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3812 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3813 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3814 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3815 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3816 // %elt = G_TRUNC %elt_bits
3817
3818 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3819 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3820
3821 // Divide to get the index in the wider element type.
3822 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3823
3824 Register WideElt = CastVec;
3825 if (CastTy.isVector()) {
3826 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3827 ScaledIdx).getReg(0);
3828 }
3829
3830 // Compute the bit offset into the register of the target element.
3832 MIRBuilder, Idx, NewEltSize, OldEltSize);
3833
3834 // Shift the wide element to get the target element.
3835 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3836 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3837 MI.eraseFromParent();
3838 return Legalized;
3839 }
3840
3841 return UnableToLegalize;
3842}
3843
3844/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3845/// TargetReg, while preserving other bits in \p TargetReg.
3846///
3847/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3849 Register TargetReg, Register InsertReg,
3850 Register OffsetBits) {
3851 LLT TargetTy = B.getMRI()->getType(TargetReg);
3852 LLT InsertTy = B.getMRI()->getType(InsertReg);
3853 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3854 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3855
3856 // Produce a bitmask of the value to insert
3857 auto EltMask = B.buildConstant(
3858 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3859 InsertTy.getSizeInBits()));
3860 // Shift it into position
3861 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3862 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3863
3864 // Clear out the bits in the wide element
3865 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3866
3867 // The value to insert has all zeros already, so stick it into the masked
3868 // wide element.
3869 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3870}
3871
3872/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3873/// is increasing the element size, perform the indexing in the target element
3874/// type, and use bit operations to insert at the element position. This is
3875/// intended for architectures that can dynamically index the register file and
3876/// want to force indexing in the native register size.
3879 LLT CastTy) {
3880 if (TypeIdx != 0)
3881 return UnableToLegalize;
3882
3883 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3884 MI.getFirst4RegLLTs();
3885 LLT VecTy = DstTy;
3886
3887 LLT VecEltTy = VecTy.getElementType();
3888 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3889 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3890 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3891
3892 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3893 unsigned OldNumElts = VecTy.getNumElements();
3894
3895 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3896 if (NewNumElts < OldNumElts) {
3897 if (NewEltSize % OldEltSize != 0)
3898 return UnableToLegalize;
3899
3900 // This only depends on powers of 2 because we use bit tricks to figure out
3901 // the bit offset we need to shift to get the target element. A general
3902 // expansion could emit division/multiply.
3903 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3904 return UnableToLegalize;
3905
3906 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3907 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3908
3909 // Divide to get the index in the wider element type.
3910 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3911
3912 Register ExtractedElt = CastVec;
3913 if (CastTy.isVector()) {
3914 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3915 ScaledIdx).getReg(0);
3916 }
3917
3918 // Compute the bit offset into the register of the target element.
3920 MIRBuilder, Idx, NewEltSize, OldEltSize);
3921
3922 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3923 Val, OffsetBits);
3924 if (CastTy.isVector()) {
3925 InsertedElt = MIRBuilder.buildInsertVectorElement(
3926 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3927 }
3928
3929 MIRBuilder.buildBitcast(Dst, InsertedElt);
3930 MI.eraseFromParent();
3931 return Legalized;
3932 }
3933
3934 return UnableToLegalize;
3935}
3936
3937// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
3938// those that have smaller than legal operands.
3939//
3940// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
3941//
3942// ===>
3943//
3944// s32 = G_BITCAST <4 x s8>
3945// s32 = G_BITCAST <4 x s8>
3946// s32 = G_BITCAST <4 x s8>
3947// s32 = G_BITCAST <4 x s8>
3948// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
3949// <16 x s8> = G_BITCAST <4 x s32>
3952 LLT CastTy) {
3953 // Convert it to CONCAT instruction
3954 auto ConcatMI = dyn_cast<GConcatVectors>(&MI);
3955 if (!ConcatMI) {
3956 return UnableToLegalize;
3957 }
3958
3959 // Check if bitcast is Legal
3960 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
3961 LLT SrcScalTy = LLT::scalar(SrcTy.getSizeInBits());
3962
3963 // Check if the build vector is Legal
3964 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3965 return UnableToLegalize;
3966 }
3967
3968 // Bitcast the sources
3969 SmallVector<Register> BitcastRegs;
3970 for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3971 BitcastRegs.push_back(
3972 MIRBuilder.buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3973 .getReg(0));
3974 }
3975
3976 // Build the scalar values into a vector
3977 Register BuildReg =
3978 MIRBuilder.buildBuildVector(CastTy, BitcastRegs).getReg(0);
3979 MIRBuilder.buildBitcast(DstReg, BuildReg);
3980
3981 MI.eraseFromParent();
3982 return Legalized;
3983}
3984
3985// This bitcasts a shuffle vector to a different type currently of the same
3986// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
3987// will be used instead.
3988//
3989// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
3990// ===>
3991// <4 x s64> = G_PTRTOINT <4 x p0>
3992// <4 x s64> = G_PTRTOINT <4 x p0>
3993// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
3994// <16 x p0> = G_INTTOPTR <16 x s64>
3997 LLT CastTy) {
3998 auto ShuffleMI = cast<GShuffleVector>(&MI);
3999 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
4000 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
4001
4002 // We currently only handle vectors of the same size.
4003 if (TypeIdx != 0 ||
4004 CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
4005 CastTy.getElementCount() != DstTy.getElementCount())
4006 return UnableToLegalize;
4007
4008 LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType());
4009
4010 auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
4011 auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
4012 auto Shuf =
4013 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
4014 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
4015
4016 MI.eraseFromParent();
4017 return Legalized;
4018}
4019
4020/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
4021///
4022/// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
4023///
4024/// ===>
4025///
4026/// <vscale x 2 x i1> = G_BITCAST <vscale x 16 x i1>
4027/// <vscale x 1 x i8> = G_EXTRACT_SUBVECTOR <vscale x 2 x i1>, N / 8
4028/// <vscale x 8 x i1> = G_BITCAST <vscale x 1 x i8>
4031 LLT CastTy) {
4032 auto ES = cast<GExtractSubvector>(&MI);
4033
4034 if (!CastTy.isVector())
4035 return UnableToLegalize;
4036
4037 if (TypeIdx != 0)
4038 return UnableToLegalize;
4039
4040 Register Dst = ES->getReg(0);
4041 Register Src = ES->getSrcVec();
4042 uint64_t Idx = ES->getIndexImm();
4043
4044 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4045
4046 LLT DstTy = MRI.getType(Dst);
4047 LLT SrcTy = MRI.getType(Src);
4048 ElementCount DstTyEC = DstTy.getElementCount();
4049 ElementCount SrcTyEC = SrcTy.getElementCount();
4050 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4051 auto SrcTyMinElts = SrcTyEC.getKnownMinValue();
4052
4053 if (DstTy == CastTy)
4054 return Legalized;
4055
4056 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4057 return UnableToLegalize;
4058
4059 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4060 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4061 if (CastEltSize < DstEltSize)
4062 return UnableToLegalize;
4063
4064 auto AdjustAmt = CastEltSize / DstEltSize;
4065 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4066 SrcTyMinElts % AdjustAmt != 0)
4067 return UnableToLegalize;
4068
4069 Idx /= AdjustAmt;
4070 SrcTy = LLT::vector(SrcTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4071 auto CastVec = MIRBuilder.buildBitcast(SrcTy, Src);
4072 auto PromotedES = MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
4073 MIRBuilder.buildBitcast(Dst, PromotedES);
4074
4075 ES->eraseFromParent();
4076 return Legalized;
4077}
4078
4079/// This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
4080///
4081/// <vscale x 16 x i1> = G_INSERT_SUBVECTOR <vscale x 16 x i1>,
4082/// <vscale x 8 x i1>,
4083/// N
4084///
4085/// ===>
4086///
4087/// <vscale x 2 x i8> = G_BITCAST <vscale x 16 x i1>
4088/// <vscale x 1 x i8> = G_BITCAST <vscale x 8 x i1>
4089/// <vscale x 2 x i8> = G_INSERT_SUBVECTOR <vscale x 2 x i8>,
4090/// <vscale x 1 x i8>, N / 8
4091/// <vscale x 16 x i1> = G_BITCAST <vscale x 2 x i8>
4094 LLT CastTy) {
4095 auto ES = cast<GInsertSubvector>(&MI);
4096
4097 if (!CastTy.isVector())
4098 return UnableToLegalize;
4099
4100 if (TypeIdx != 0)
4101 return UnableToLegalize;
4102
4103 Register Dst = ES->getReg(0);
4104 Register BigVec = ES->getBigVec();
4105 Register SubVec = ES->getSubVec();
4106 uint64_t Idx = ES->getIndexImm();
4107
4108 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4109
4110 LLT DstTy = MRI.getType(Dst);
4111 LLT BigVecTy = MRI.getType(BigVec);
4112 LLT SubVecTy = MRI.getType(SubVec);
4113
4114 if (DstTy == CastTy)
4115 return Legalized;
4116
4117 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4118 return UnableToLegalize;
4119
4120 ElementCount DstTyEC = DstTy.getElementCount();
4121 ElementCount BigVecTyEC = BigVecTy.getElementCount();
4122 ElementCount SubVecTyEC = SubVecTy.getElementCount();
4123 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4124 auto BigVecTyMinElts = BigVecTyEC.getKnownMinValue();
4125 auto SubVecTyMinElts = SubVecTyEC.getKnownMinValue();
4126
4127 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4128 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4129 if (CastEltSize < DstEltSize)
4130 return UnableToLegalize;
4131
4132 auto AdjustAmt = CastEltSize / DstEltSize;
4133 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4134 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
4135 return UnableToLegalize;
4136
4137 Idx /= AdjustAmt;
4138 BigVecTy = LLT::vector(BigVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4139 SubVecTy = LLT::vector(SubVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4140 auto CastBigVec = MIRBuilder.buildBitcast(BigVecTy, BigVec);
4141 auto CastSubVec = MIRBuilder.buildBitcast(SubVecTy, SubVec);
4142 auto PromotedIS =
4143 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
4144 MIRBuilder.buildBitcast(Dst, PromotedIS);
4145
4146 ES->eraseFromParent();
4147 return Legalized;
4148}
4149
4151 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
4152 Register DstReg = LoadMI.getDstReg();
4153 Register PtrReg = LoadMI.getPointerReg();
4154 LLT DstTy = MRI.getType(DstReg);
4155 MachineMemOperand &MMO = LoadMI.getMMO();
4156 LLT MemTy = MMO.getMemoryType();
4157 MachineFunction &MF = MIRBuilder.getMF();
4158
4159 unsigned MemSizeInBits = MemTy.getSizeInBits();
4160 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
4161
4162 if (MemSizeInBits != MemStoreSizeInBits) {
4163 if (MemTy.isVector())
4164 return UnableToLegalize;
4165
4166 // Promote to a byte-sized load if not loading an integral number of
4167 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
4168 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
4169 MachineMemOperand *NewMMO =
4170 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
4171
4172 Register LoadReg = DstReg;
4173 LLT LoadTy = DstTy;
4174
4175 // If this wasn't already an extending load, we need to widen the result
4176 // register to avoid creating a load with a narrower result than the source.
4177 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
4178 LoadTy = WideMemTy;
4179 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
4180 }
4181
4182 if (isa<GSExtLoad>(LoadMI)) {
4183 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4184 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
4185 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
4186 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4187 // The extra bits are guaranteed to be zero, since we stored them that
4188 // way. A zext load from Wide thus automatically gives zext from MemVT.
4189 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4190 } else {
4191 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4192 }
4193
4194 if (DstTy != LoadTy)
4195 MIRBuilder.buildTrunc(DstReg, LoadReg);
4196
4197 LoadMI.eraseFromParent();
4198 return Legalized;
4199 }
4200
4201 // Big endian lowering not implemented.
4202 if (MIRBuilder.getDataLayout().isBigEndian())
4203 return UnableToLegalize;
4204
4205 // This load needs splitting into power of 2 sized loads.
4206 //
4207 // Our strategy here is to generate anyextending loads for the smaller
4208 // types up to next power-2 result type, and then combine the two larger
4209 // result values together, before truncating back down to the non-pow-2
4210 // type.
4211 // E.g. v1 = i24 load =>
4212 // v2 = i32 zextload (2 byte)
4213 // v3 = i32 load (1 byte)
4214 // v4 = i32 shl v3, 16
4215 // v5 = i32 or v4, v2
4216 // v1 = i24 trunc v5
4217 // By doing this we generate the correct truncate which should get
4218 // combined away as an artifact with a matching extend.
4219
4220 uint64_t LargeSplitSize, SmallSplitSize;
4221
4222 if (!isPowerOf2_32(MemSizeInBits)) {
4223 // This load needs splitting into power of 2 sized loads.
4224 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
4225 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4226 } else {
4227 // This is already a power of 2, but we still need to split this in half.
4228 //
4229 // Assume we're being asked to decompose an unaligned load.
4230 // TODO: If this requires multiple splits, handle them all at once.
4231 auto &Ctx = MF.getFunction().getContext();
4232 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4233 return UnableToLegalize;
4234
4235 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4236 }
4237
4238 if (MemTy.isVector()) {
4239 // TODO: Handle vector extloads
4240 if (MemTy != DstTy)
4241 return UnableToLegalize;
4242
4243 Align Alignment = LoadMI.getAlign();
4244 // Given an alignment larger than the size of the memory, we can increase
4245 // the size of the load without needing to scalarize it.
4246 if (Alignment.value() * 8 > MemSizeInBits &&
4248 LLT MoreTy = DstTy.changeVectorElementCount(
4250 MachineMemOperand *NewMMO = MF.getMachineMemOperand(&MMO, 0, MoreTy);
4251 auto NewLoad = MIRBuilder.buildLoad(MoreTy, PtrReg, *NewMMO);
4252 MIRBuilder.buildDeleteTrailingVectorElements(LoadMI.getReg(0),
4253 NewLoad.getReg(0));
4254 LoadMI.eraseFromParent();
4255 return Legalized;
4256 }
4257
4258 // TODO: We can do better than scalarizing the vector and at least split it
4259 // in half.
4260 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
4261 }
4262
4263 MachineMemOperand *LargeMMO =
4264 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4265 MachineMemOperand *SmallMMO =
4266 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4267
4268 LLT PtrTy = MRI.getType(PtrReg);
4269 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
4270 LLT AnyExtTy = LLT::scalar(AnyExtSize);
4271 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4272 PtrReg, *LargeMMO);
4273
4274 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
4275 LargeSplitSize / 8);
4276 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4277 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrAddReg, PtrReg, OffsetCst);
4278 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
4279 SmallPtr, *SmallMMO);
4280
4281 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4282 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4283
4284 if (AnyExtTy == DstTy)
4285 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4286 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
4287 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4288 MIRBuilder.buildTrunc(DstReg, {Or});
4289 } else {
4290 assert(DstTy.isPointer() && "expected pointer");
4291 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4292
4293 // FIXME: We currently consider this to be illegal for non-integral address
4294 // spaces, but we need still need a way to reinterpret the bits.
4295 MIRBuilder.buildIntToPtr(DstReg, Or);
4296 }
4297
4298 LoadMI.eraseFromParent();
4299 return Legalized;
4300}
4301
4303 // Lower a non-power of 2 store into multiple pow-2 stores.
4304 // E.g. split an i24 store into an i16 store + i8 store.
4305 // We do this by first extending the stored value to the next largest power
4306 // of 2 type, and then using truncating stores to store the components.
4307 // By doing this, likewise with G_LOAD, generate an extend that can be
4308 // artifact-combined away instead of leaving behind extracts.
4309 Register SrcReg = StoreMI.getValueReg();
4310 Register PtrReg = StoreMI.getPointerReg();
4311 LLT SrcTy = MRI.getType(SrcReg);
4312 MachineFunction &MF = MIRBuilder.getMF();
4313 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4314 LLT MemTy = MMO.getMemoryType();
4315
4316 unsigned StoreWidth = MemTy.getSizeInBits();
4317 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
4318
4319 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4320 // Promote to a byte-sized store with upper bits zero if not
4321 // storing an integral number of bytes. For example, promote
4322 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
4323 LLT WideTy = LLT::scalar(StoreSizeInBits);
4324
4325 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4326 // Avoid creating a store with a narrower source than result.
4327 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4328 SrcTy = WideTy;
4329 }
4330
4331 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4332
4333 MachineMemOperand *NewMMO =
4334 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
4335 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4336 StoreMI.eraseFromParent();
4337 return Legalized;
4338 }
4339
4340 if (MemTy.isVector()) {
4341 if (MemTy != SrcTy)
4342 return scalarizeVectorBooleanStore(StoreMI);
4343
4344 // TODO: We can do better than scalarizing the vector and at least split it
4345 // in half.
4346 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
4347 }
4348
4349 unsigned MemSizeInBits = MemTy.getSizeInBits();
4350 uint64_t LargeSplitSize, SmallSplitSize;
4351
4352 if (!isPowerOf2_32(MemSizeInBits)) {
4353 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
4354 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
4355 } else {
4356 auto &Ctx = MF.getFunction().getContext();
4357 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4358 return UnableToLegalize; // Don't know what we're being asked to do.
4359
4360 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4361 }
4362
4363 // Extend to the next pow-2. If this store was itself the result of lowering,
4364 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
4365 // that's wider than the stored size.
4366 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
4367 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
4368
4369 if (SrcTy.isPointer()) {
4370 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
4371 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4372 }
4373
4374 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4375
4376 // Obtain the smaller value by shifting away the larger value.
4377 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4378 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4379
4380 // Generate the PtrAdd and truncating stores.
4381 LLT PtrTy = MRI.getType(PtrReg);
4382 auto OffsetCst = MIRBuilder.buildConstant(
4383 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
4384 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrTy, PtrReg, OffsetCst);
4385
4386 MachineMemOperand *LargeMMO =
4387 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4388 MachineMemOperand *SmallMMO =
4389 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4390 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4391 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4392 StoreMI.eraseFromParent();
4393 return Legalized;
4394}
4395
4398 Register SrcReg = StoreMI.getValueReg();
4399 Register PtrReg = StoreMI.getPointerReg();
4400 LLT SrcTy = MRI.getType(SrcReg);
4401 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4402 LLT MemTy = MMO.getMemoryType();
4403 LLT MemScalarTy = MemTy.getElementType();
4404 MachineFunction &MF = MIRBuilder.getMF();
4405
4406 assert(SrcTy.isVector() && "Expect a vector store type");
4407
4408 if (!MemScalarTy.isByteSized()) {
4409 // We need to build an integer scalar of the vector bit pattern.
4410 // It's not legal for us to add padding when storing a vector.
4411 unsigned NumBits = MemTy.getSizeInBits();
4412 LLT IntTy = LLT::scalar(NumBits);
4413 auto CurrVal = MIRBuilder.buildConstant(IntTy, 0);
4414 LLT IdxTy = TLI.getVectorIdxLLT(MF.getDataLayout());
4415
4416 for (unsigned I = 0, E = MemTy.getNumElements(); I < E; ++I) {
4417 auto Elt = MIRBuilder.buildExtractVectorElement(
4418 SrcTy.getElementType(), SrcReg, MIRBuilder.buildConstant(IdxTy, I));
4419 auto Trunc = MIRBuilder.buildTrunc(MemScalarTy, Elt);
4420 auto ZExt = MIRBuilder.buildZExt(IntTy, Trunc);
4421 unsigned ShiftIntoIdx = MF.getDataLayout().isBigEndian()
4422 ? (MemTy.getNumElements() - 1) - I
4423 : I;
4424 auto ShiftAmt = MIRBuilder.buildConstant(
4425 IntTy, ShiftIntoIdx * MemScalarTy.getSizeInBits());
4426 auto Shifted = MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4427 CurrVal = MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4428 }
4429 auto PtrInfo = MMO.getPointerInfo();
4430 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, IntTy);
4431 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4432 StoreMI.eraseFromParent();
4433 return Legalized;
4434 }
4435
4436 // TODO: implement simple scalarization.
4437 return UnableToLegalize;
4438}
4439
4441LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
4442 switch (MI.getOpcode()) {
4443 case TargetOpcode::G_LOAD: {
4444 if (TypeIdx != 0)
4445 return UnableToLegalize;
4446 MachineMemOperand &MMO = **MI.memoperands_begin();
4447
4448 // Not sure how to interpret a bitcast of an extending load.
4449 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4450 return UnableToLegalize;
4451
4452 Observer.changingInstr(MI);
4453 bitcastDst(MI, CastTy, 0);
4454 MMO.setType(CastTy);
4455 // The range metadata is no longer valid when reinterpreted as a different
4456 // type.
4457 MMO.clearRanges();
4458 Observer.changedInstr(MI);
4459 return Legalized;
4460 }
4461 case TargetOpcode::G_STORE: {
4462 if (TypeIdx != 0)
4463 return UnableToLegalize;
4464
4465 MachineMemOperand &MMO = **MI.memoperands_begin();
4466
4467 // Not sure how to interpret a bitcast of a truncating store.
4468 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4469 return UnableToLegalize;
4470
4471 Observer.changingInstr(MI);
4472 bitcastSrc(MI, CastTy, 0);
4473 MMO.setType(CastTy);
4474 Observer.changedInstr(MI);
4475 return Legalized;
4476 }
4477 case TargetOpcode::G_SELECT: {
4478 if (TypeIdx != 0)
4479 return UnableToLegalize;
4480
4481 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
4482 LLVM_DEBUG(
4483 dbgs() << "bitcast action not implemented for vector select\n");
4484 return UnableToLegalize;
4485 }
4486
4487 Observer.changingInstr(MI);
4488 bitcastSrc(MI, CastTy, 2);
4489 bitcastSrc(MI, CastTy, 3);
4490 bitcastDst(MI, CastTy, 0);
4491 Observer.changedInstr(MI);
4492 return Legalized;
4493 }
4494 case TargetOpcode::G_AND:
4495 case TargetOpcode::G_OR:
4496 case TargetOpcode::G_XOR: {
4497 Observer.changingInstr(MI);
4498 bitcastSrc(MI, CastTy, 1);
4499 bitcastSrc(MI, CastTy, 2);
4500 bitcastDst(MI, CastTy, 0);
4501 Observer.changedInstr(MI);
4502 return Legalized;
4503 }
4504 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4505 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
4506 case TargetOpcode::G_INSERT_VECTOR_ELT:
4507 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
4508 case TargetOpcode::G_CONCAT_VECTORS:
4509 return bitcastConcatVector(MI, TypeIdx, CastTy);
4510 case TargetOpcode::G_SHUFFLE_VECTOR:
4511 return bitcastShuffleVector(MI, TypeIdx, CastTy);
4512 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4513 return bitcastExtractSubvector(MI, TypeIdx, CastTy);
4514 case TargetOpcode::G_INSERT_SUBVECTOR:
4515 return bitcastInsertSubvector(MI, TypeIdx, CastTy);
4516 default:
4517 return UnableToLegalize;
4518 }
4519}
4520
4521// Legalize an instruction by changing the opcode in place.
4522void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
4524 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
4526}
4527
4529LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
4530 using namespace TargetOpcode;
4531
4532 switch(MI.getOpcode()) {
4533 default:
4534 return UnableToLegalize;
4535 case TargetOpcode::G_FCONSTANT:
4536 return lowerFConstant(MI);
4537 case TargetOpcode::G_BITCAST:
4538 return lowerBitcast(MI);
4539 case TargetOpcode::G_SREM:
4540 case TargetOpcode::G_UREM: {
4541 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4542 auto Quot =
4543 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4544 {MI.getOperand(1), MI.getOperand(2)});
4545
4546 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
4547 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
4548 MI.eraseFromParent();
4549 return Legalized;
4550 }
4551 case TargetOpcode::G_SADDO:
4552 case TargetOpcode::G_SSUBO:
4553 return lowerSADDO_SSUBO(MI);
4554 case TargetOpcode::G_SADDE:
4555 return lowerSADDE(MI);
4556 case TargetOpcode::G_SSUBE:
4557 return lowerSSUBE(MI);
4558 case TargetOpcode::G_UMULH:
4559 case TargetOpcode::G_SMULH:
4560 return lowerSMULH_UMULH(MI);
4561 case TargetOpcode::G_SMULO:
4562 case TargetOpcode::G_UMULO: {
4563 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
4564 // result.
4565 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
4566 LLT Ty = MRI.getType(Res);
4567
4568 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
4569 ? TargetOpcode::G_SMULH
4570 : TargetOpcode::G_UMULH;
4571
4572 Observer.changingInstr(MI);
4573 const auto &TII = MIRBuilder.getTII();
4574 MI.setDesc(TII.get(TargetOpcode::G_MUL));
4575 MI.removeOperand(1);
4576 Observer.changedInstr(MI);
4577
4578 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4579 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4580
4581 // Move insert point forward so we can use the Res register if needed.
4582 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
4583
4584 // For *signed* multiply, overflow is detected by checking:
4585 // (hi != (lo >> bitwidth-1))
4586 if (Opcode == TargetOpcode::G_SMULH) {
4587 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4588 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4589 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
4590 } else {
4591 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
4592 }
4593 return Legalized;
4594 }
4595 case TargetOpcode::G_FNEG: {
4596 auto [Res, SubByReg] = MI.getFirst2Regs();
4597 LLT Ty = MRI.getType(Res);
4598
4599 auto SignMask = MIRBuilder.buildConstant(
4600 Ty, APInt::getSignMask(Ty.getScalarSizeInBits()));
4601 MIRBuilder.buildXor(Res, SubByReg, SignMask);
4602 MI.eraseFromParent();
4603 return Legalized;
4604 }
4605 case TargetOpcode::G_FSUB:
4606 case TargetOpcode::G_STRICT_FSUB: {
4607 auto [Res, LHS, RHS] = MI.getFirst3Regs();
4608 LLT Ty = MRI.getType(Res);
4609
4610 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
4611 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
4612
4613 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4614 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
4615 else
4616 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
4617
4618 MI.eraseFromParent();
4619 return Legalized;
4620 }
4621 case TargetOpcode::G_FMAD:
4622 return lowerFMad(MI);
4623 case TargetOpcode::G_FFLOOR:
4624 return lowerFFloor(MI);
4625 case TargetOpcode::G_LROUND:
4626 case TargetOpcode::G_LLROUND: {
4627 Register DstReg = MI.getOperand(0).getReg();
4628 Register SrcReg = MI.getOperand(1).getReg();
4629 LLT SrcTy = MRI.getType(SrcReg);
4630 auto Round = MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4631 {SrcReg});
4632 MIRBuilder.buildFPTOSI(DstReg, Round);
4633 MI.eraseFromParent();
4634 return Legalized;
4635 }
4636 case TargetOpcode::G_INTRINSIC_ROUND:
4637 return lowerIntrinsicRound(MI);
4638 case TargetOpcode::G_FRINT: {
4639 // Since round even is the assumed rounding mode for unconstrained FP
4640 // operations, rint and roundeven are the same operation.
4641 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4642 return Legalized;
4643 }
4644 case TargetOpcode::G_INTRINSIC_LRINT:
4645 case TargetOpcode::G_INTRINSIC_LLRINT: {
4646 Register DstReg = MI.getOperand(0).getReg();
4647 Register SrcReg = MI.getOperand(1).getReg();
4648 LLT SrcTy = MRI.getType(SrcReg);
4649 auto Round =
4650 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4651 MIRBuilder.buildFPTOSI(DstReg, Round);
4652 MI.eraseFromParent();
4653 return Legalized;
4654 }
4655 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4656 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
4657 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4658 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4659 **MI.memoperands_begin());
4660 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
4661 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4662 MI.eraseFromParent();
4663 return Legalized;
4664 }
4665 case TargetOpcode::G_LOAD:
4666 case TargetOpcode::G_SEXTLOAD:
4667 case TargetOpcode::G_ZEXTLOAD:
4668 return lowerLoad(cast<GAnyLoad>(MI));
4669 case TargetOpcode::G_STORE:
4670 return lowerStore(cast<GStore>(MI));
4671 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4672 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4673 case TargetOpcode::G_CTLZ:
4674 case TargetOpcode::G_CTTZ:
4675 case TargetOpcode::G_CTPOP:
4676 case TargetOpcode::G_CTLS:
4677 return lowerBitCount(MI);
4678 case G_UADDO: {
4679 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
4680
4681 Register NewRes = MRI.cloneVirtualRegister(Res);
4682
4683 MIRBuilder.buildAdd(NewRes, LHS, RHS);
4684 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
4685
4686 MIRBuilder.buildCopy(Res, NewRes);
4687
4688 MI.eraseFromParent();
4689 return Legalized;
4690 }
4691 case G_UADDE: {
4692 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
4693 const LLT CondTy = MRI.getType(CarryOut);
4694 const LLT Ty = MRI.getType(Res);
4695
4696 Register NewRes = MRI.cloneVirtualRegister(Res);
4697
4698 // Initial add of the two operands.
4699 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
4700
4701 // Initial check for carry.
4702 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
4703
4704 // Add the sum and the carry.
4705 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
4706 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4707
4708 // Second check for carry. We can only carry if the initial sum is all 1s
4709 // and the carry is set, resulting in a new sum of 0.
4710 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4711 auto ResEqZero =
4712 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
4713 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4714 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
4715
4716 MIRBuilder.buildCopy(Res, NewRes);
4717
4718 MI.eraseFromParent();
4719 return Legalized;
4720 }
4721 case G_USUBO: {
4722 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
4723
4724 MIRBuilder.buildSub(Res, LHS, RHS);
4725 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
4726
4727 MI.eraseFromParent();
4728 return Legalized;
4729 }
4730 case G_USUBE: {
4731 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
4732 const LLT CondTy = MRI.getType(BorrowOut);
4733 const LLT Ty = MRI.getType(Res);
4734
4735 // Initial subtract of the two operands.
4736 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
4737
4738 // Initial check for borrow.
4739 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
4740
4741 // Subtract the borrow from the first subtract.
4742 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
4743 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4744
4745 // Second check for borrow. We can only borrow if the initial difference is
4746 // 0 and the borrow is set, resulting in a new difference of all 1s.
4747 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4748 auto TmpResEqZero =
4749 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
4750 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4751 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4752
4753 MI.eraseFromParent();
4754 return Legalized;
4755 }
4756 case G_UITOFP:
4757 return lowerUITOFP(MI);
4758 case G_SITOFP:
4759 return lowerSITOFP(MI);
4760 case G_FPTOUI:
4761 return lowerFPTOUI(MI);
4762 case G_FPTOSI:
4763 return lowerFPTOSI(MI);
4764 case G_FPTOUI_SAT:
4765 case G_FPTOSI_SAT:
4766 return lowerFPTOINT_SAT(MI);
4767 case G_FPTRUNC:
4768 return lowerFPTRUNC(MI);
4769 case G_FPOWI:
4770 return lowerFPOWI(MI);
4771 case G_FMODF:
4772 return lowerFMODF(MI);
4773 case G_SMIN:
4774 case G_SMAX:
4775 case G_UMIN:
4776 case G_UMAX:
4777 return lowerMinMax(MI);
4778 case G_SCMP:
4779 case G_UCMP:
4780 return lowerThreewayCompare(MI);
4781 case G_FCOPYSIGN:
4782 return lowerFCopySign(MI);
4783 case G_FMINNUM:
4784 case G_FMAXNUM:
4785 case G_FMINIMUMNUM:
4786 case G_FMAXIMUMNUM:
4787 return lowerFMinNumMaxNum(MI);
4788 case G_FMINIMUM:
4789 case G_FMAXIMUM:
4790 return lowerFMinimumMaximum(MI);
4791 case G_MERGE_VALUES:
4792 return lowerMergeValues(MI);
4793 case G_UNMERGE_VALUES:
4794 return lowerUnmergeValues(MI);
4795 case TargetOpcode::G_SEXT_INREG: {
4796 assert(MI.getOperand(2).isImm() && "Expected immediate");
4797 int64_t SizeInBits = MI.getOperand(2).getImm();
4798
4799 auto [DstReg, SrcReg] = MI.getFirst2Regs();
4800 LLT DstTy = MRI.getType(DstReg);
4801 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4802
4803 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
4804 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4805 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4806 MI.eraseFromParent();
4807 return Legalized;
4808 }
4809 case G_EXTRACT_VECTOR_ELT:
4810 case G_INSERT_VECTOR_ELT:
4812 case G_SHUFFLE_VECTOR:
4813 return lowerShuffleVector(MI);
4814 case G_VECTOR_COMPRESS:
4815 return lowerVECTOR_COMPRESS(MI);
4816 case G_DYN_STACKALLOC:
4817 return lowerDynStackAlloc(MI);
4818 case G_STACKSAVE:
4819 return lowerStackSave(MI);
4820 case G_STACKRESTORE:
4821 return lowerStackRestore(MI);
4822 case G_EXTRACT:
4823 return lowerExtract(MI);
4824 case G_INSERT:
4825 return lowerInsert(MI);
4826 case G_BSWAP:
4827 return lowerBswap(MI);
4828 case G_BITREVERSE:
4829 return lowerBitreverse(MI);
4830 case G_READ_REGISTER:
4831 case G_WRITE_REGISTER:
4832 return lowerReadWriteRegister(MI);
4833 case G_UADDSAT:
4834 case G_USUBSAT: {
4835 // Try to make a reasonable guess about which lowering strategy to use. The
4836 // target can override this with custom lowering and calling the
4837 // implementation functions.
4838 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4839 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4840 return lowerAddSubSatToMinMax(MI);
4842 }
4843 case G_SADDSAT:
4844 case G_SSUBSAT: {
4845 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4846
4847 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
4848 // since it's a shorter expansion. However, we would need to figure out the
4849 // preferred boolean type for the carry out for the query.
4850 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4851 return lowerAddSubSatToMinMax(MI);
4853 }
4854 case G_SSHLSAT:
4855 case G_USHLSAT:
4856 return lowerShlSat(MI);
4857 case G_ABS:
4858 return lowerAbsToAddXor(MI);
4859 case G_ABDS:
4860 case G_ABDU: {
4861 bool IsSigned = MI.getOpcode() == G_ABDS;
4862 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4863 if ((IsSigned && LI.isLegal({G_SMIN, Ty}) && LI.isLegal({G_SMAX, Ty})) ||
4864 (!IsSigned && LI.isLegal({G_UMIN, Ty}) && LI.isLegal({G_UMAX, Ty}))) {
4865 return lowerAbsDiffToMinMax(MI);
4866 }
4867 return lowerAbsDiffToSelect(MI);
4868 }
4869 case G_FABS:
4870 return lowerFAbs(MI);
4871 case G_SELECT:
4872 return lowerSelect(MI);
4873 case G_IS_FPCLASS:
4874 return lowerISFPCLASS(MI);
4875 case G_SDIVREM:
4876 case G_UDIVREM:
4877 return lowerDIVREM(MI);
4878 case G_FSHL:
4879 case G_FSHR:
4880 return lowerFunnelShift(MI);
4881 case G_ROTL:
4882 case G_ROTR:
4883 return lowerRotate(MI);
4884 case G_MEMSET:
4885 case G_MEMCPY:
4886 case G_MEMMOVE:
4887 return lowerMemCpyFamily(MI);
4888 case G_MEMCPY_INLINE:
4889 return lowerMemcpyInline(MI);
4890 case G_ZEXT:
4891 case G_SEXT:
4892 case G_ANYEXT:
4893 return lowerEXT(MI);
4894 case G_TRUNC:
4895 return lowerTRUNC(MI);
4897 return lowerVectorReduction(MI);
4898 case G_VAARG:
4899 return lowerVAArg(MI);
4900 case G_ATOMICRMW_SUB: {
4901 auto [Ret, Mem, Val] = MI.getFirst3Regs();
4902 const LLT ValTy = MRI.getType(Val);
4903 MachineMemOperand *MMO = *MI.memoperands_begin();
4904
4905 auto VNeg = MIRBuilder.buildNeg(ValTy, Val);
4906 MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, Ret, Mem, VNeg, *MMO);
4907 MI.eraseFromParent();
4908 return Legalized;
4909 }
4910 }
4911}
4912
4914 Align MinAlign) const {
4915 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4916 // datalayout for the preferred alignment. Also there should be a target hook
4917 // for this to allow targets to reduce the alignment and ignore the
4918 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4919 // the type.
4920 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4921}
4922
4925 MachinePointerInfo &PtrInfo) {
4926 MachineFunction &MF = MIRBuilder.getMF();
4927 const DataLayout &DL = MIRBuilder.getDataLayout();
4928 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4929
4930 unsigned AddrSpace = DL.getAllocaAddrSpace();
4931 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4932
4933 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4934 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4935}
4936
4938 const SrcOp &Val) {
4939 LLT SrcTy = Val.getLLTTy(MRI);
4940 Align StackTypeAlign =
4941 std::max(getStackTemporaryAlignment(SrcTy),
4943 MachinePointerInfo PtrInfo;
4944 auto StackTemp =
4945 createStackTemporary(SrcTy.getSizeInBytes(), StackTypeAlign, PtrInfo);
4946
4947 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
4948 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
4949}
4950
4952 LLT VecTy) {
4953 LLT IdxTy = B.getMRI()->getType(IdxReg);
4954 unsigned NElts = VecTy.getNumElements();
4955
4956 int64_t IdxVal;
4957 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
4958 if (IdxVal < VecTy.getNumElements())
4959 return IdxReg;
4960 // If a constant index would be out of bounds, clamp it as well.
4961 }
4962
4963 if (isPowerOf2_32(NElts)) {
4964 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4965 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4966 }
4967
4968 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4969 .getReg(0);
4970}
4971
4973 Register Index) {
4974 LLT EltTy = VecTy.getElementType();
4975
4976 // Calculate the element offset and add it to the pointer.
4977 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4978 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4979 "Converting bits to bytes lost precision");
4980
4981 Index = clampVectorIndex(MIRBuilder, Index, VecTy);
4982
4983 // Convert index to the correct size for the address space.
4984 const DataLayout &DL = MIRBuilder.getDataLayout();
4985 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4986 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
4987 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
4988 if (IdxTy != MRI.getType(Index))
4989 Index = MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
4990
4991 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4992 MIRBuilder.buildConstant(IdxTy, EltSize));
4993
4994 LLT PtrTy = MRI.getType(VecPtr);
4995 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
4996}
4997
4998#ifndef NDEBUG
4999/// Check that all vector operands have same number of elements. Other operands
5000/// should be listed in NonVecOp.
5003 std::initializer_list<unsigned> NonVecOpIndices) {
5004 if (MI.getNumMemOperands() != 0)
5005 return false;
5006
5007 LLT VecTy = MRI.getType(MI.getReg(0));
5008 if (!VecTy.isVector())
5009 return false;
5010 unsigned NumElts = VecTy.getNumElements();
5011
5012 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
5013 MachineOperand &Op = MI.getOperand(OpIdx);
5014 if (!Op.isReg()) {
5015 if (!is_contained(NonVecOpIndices, OpIdx))
5016 return false;
5017 continue;
5018 }
5019
5020 LLT Ty = MRI.getType(Op.getReg());
5021 if (!Ty.isVector()) {
5022 if (!is_contained(NonVecOpIndices, OpIdx))
5023 return false;
5024 continue;
5025 }
5026
5027 if (Ty.getNumElements() != NumElts)
5028 return false;
5029 }
5030
5031 return true;
5032}
5033#endif
5034
5035/// Fill \p DstOps with DstOps that have same number of elements combined as
5036/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
5037/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
5038/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
5039static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
5040 unsigned NumElts) {
5041 LLT LeftoverTy;
5042 assert(Ty.isVector() && "Expected vector type");
5043 LLT NarrowTy = Ty.changeElementCount(ElementCount::getFixed(NumElts));
5044 int NumParts, NumLeftover;
5045 std::tie(NumParts, NumLeftover) =
5046 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
5047
5048 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
5049 for (int i = 0; i < NumParts; ++i) {
5050 DstOps.push_back(NarrowTy);
5051 }
5052
5053 if (LeftoverTy.isValid()) {
5054 assert(NumLeftover == 1 && "expected exactly one leftover");
5055 DstOps.push_back(LeftoverTy);
5056 }
5057}
5058
5059/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
5060/// made from \p Op depending on operand type.
5062 MachineOperand &Op) {
5063 for (unsigned i = 0; i < N; ++i) {
5064 if (Op.isReg())
5065 Ops.push_back(Op.getReg());
5066 else if (Op.isImm())
5067 Ops.push_back(Op.getImm());
5068 else if (Op.isPredicate())
5069 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
5070 else
5071 llvm_unreachable("Unsupported type");
5072 }
5073}
5074
5075// Handle splitting vector operations which need to have the same number of
5076// elements in each type index, but each type index may have a different element
5077// type.
5078//
5079// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
5080// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5081// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5082//
5083// Also handles some irregular breakdown cases, e.g.
5084// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
5085// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5086// s64 = G_SHL s64, s32
5089 GenericMachineInstr &MI, unsigned NumElts,
5090 std::initializer_list<unsigned> NonVecOpIndices) {
5091 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
5092 "Non-compatible opcode or not specified non-vector operands");
5093 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5094
5095 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5096 unsigned NumDefs = MI.getNumDefs();
5097
5098 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
5099 // Build instructions with DstOps to use instruction found by CSE directly.
5100 // CSE copies found instruction into given vreg when building with vreg dest.
5101 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
5102 // Output registers will be taken from created instructions.
5103 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
5104 for (unsigned i = 0; i < NumDefs; ++i) {
5105 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
5106 }
5107
5108 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
5109 // Operands listed in NonVecOpIndices will be used as is without splitting;
5110 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
5111 // scalar condition (op 1), immediate in sext_inreg (op 2).
5112 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
5113 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5114 ++UseIdx, ++UseNo) {
5115 if (is_contained(NonVecOpIndices, UseIdx)) {
5116 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
5117 MI.getOperand(UseIdx));
5118 } else {
5119 SmallVector<Register, 8> SplitPieces;
5120 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
5121 MRI);
5122 llvm::append_range(InputOpsPieces[UseNo], SplitPieces);
5123 }
5124 }
5125
5126 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5127
5128 // Take i-th piece of each input operand split and build sub-vector/scalar
5129 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
5130 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5132 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5133 Defs.push_back(OutputOpsPieces[DstNo][i]);
5134
5136 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
5137 Uses.push_back(InputOpsPieces[InputNo][i]);
5138
5139 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
5140 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5141 OutputRegs[DstNo].push_back(I.getReg(DstNo));
5142 }
5143
5144 // Merge small outputs into MI's output for each def operand.
5145 if (NumLeftovers) {
5146 for (unsigned i = 0; i < NumDefs; ++i)
5147 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
5148 } else {
5149 for (unsigned i = 0; i < NumDefs; ++i)
5150 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
5151 }
5152
5153 MI.eraseFromParent();
5154 return Legalized;
5155}
5156
5159 unsigned NumElts) {
5160 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5161
5162 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5163 unsigned NumDefs = MI.getNumDefs();
5164
5165 SmallVector<DstOp, 8> OutputOpsPieces;
5166 SmallVector<Register, 8> OutputRegs;
5167 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
5168
5169 // Instructions that perform register split will be inserted in basic block
5170 // where register is defined (basic block is in the next operand).
5171 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
5172 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5173 UseIdx += 2, ++UseNo) {
5174 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
5175 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
5176 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
5177 MIRBuilder, MRI);
5178 }
5179
5180 // Build PHIs with fewer elements.
5181 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5182 MIRBuilder.setInsertPt(*MI.getParent(), MI);
5183 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5184 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
5185 Phi.addDef(
5186 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
5187 OutputRegs.push_back(Phi.getReg(0));
5188
5189 for (unsigned j = 0; j < NumInputs / 2; ++j) {
5190 Phi.addUse(InputOpsPieces[j][i]);
5191 Phi.add(MI.getOperand(1 + j * 2 + 1));
5192 }
5193 }
5194
5195 // Set the insert point after the existing PHIs
5196 MachineBasicBlock &MBB = *MI.getParent();
5197 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
5198
5199 // Merge small outputs into MI's def.
5200 if (NumLeftovers) {
5201 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
5202 } else {
5203 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
5204 }
5205
5206 MI.eraseFromParent();
5207 return Legalized;
5208}
5209
5212 unsigned TypeIdx,
5213 LLT NarrowTy) {
5214 const int NumDst = MI.getNumOperands() - 1;
5215 const Register SrcReg = MI.getOperand(NumDst).getReg();
5216 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5217 LLT SrcTy = MRI.getType(SrcReg);
5218
5219 if (TypeIdx != 1 || NarrowTy == DstTy)
5220 return UnableToLegalize;
5221
5222 // Requires compatible types. Otherwise SrcReg should have been defined by
5223 // merge-like instruction that would get artifact combined. Most likely
5224 // instruction that defines SrcReg has to perform more/fewer elements
5225 // legalization compatible with NarrowTy.
5226 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5227 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5228
5229 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5230 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
5231 return UnableToLegalize;
5232
5233 // This is most likely DstTy (smaller then register size) packed in SrcTy
5234 // (larger then register size) and since unmerge was not combined it will be
5235 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
5236 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
5237
5238 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
5239 //
5240 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
5241 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
5242 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
5243 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5244 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5245 const int PartsPerUnmerge = NumDst / NumUnmerge;
5246
5247 for (int I = 0; I != NumUnmerge; ++I) {
5248 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5249
5250 for (int J = 0; J != PartsPerUnmerge; ++J)
5251 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
5252 MIB.addUse(Unmerge.getReg(I));
5253 }
5254
5255 MI.eraseFromParent();
5256 return Legalized;
5257}
5258
5261 LLT NarrowTy) {
5262 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5263 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
5264 // that should have been artifact combined. Most likely instruction that uses
5265 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
5266 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5267 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5268 if (NarrowTy == SrcTy)
5269 return UnableToLegalize;
5270
5271 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
5272 // is for old mir tests. Since the changes to more/fewer elements it should no
5273 // longer be possible to generate MIR like this when starting from llvm-ir
5274 // because LCMTy approach was replaced with merge/unmerge to vector elements.
5275 if (TypeIdx == 1) {
5276 assert(SrcTy.isVector() && "Expected vector types");
5277 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5278 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5279 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
5280 return UnableToLegalize;
5281 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
5282 //
5283 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
5284 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
5285 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
5286 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
5287 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
5288 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
5289
5291 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
5292 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
5293 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
5294 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5295 Elts.push_back(Unmerge.getReg(j));
5296 }
5297
5298 SmallVector<Register, 8> NarrowTyElts;
5299 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
5300 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5301 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
5302 ++i, Offset += NumNarrowTyElts) {
5303 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
5304 NarrowTyElts.push_back(
5305 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5306 }
5307
5308 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5309 MI.eraseFromParent();
5310 return Legalized;
5311 }
5312
5313 assert(TypeIdx == 0 && "Bad type index");
5314 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5315 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
5316 return UnableToLegalize;
5317
5318 // This is most likely SrcTy (smaller then register size) packed in DstTy
5319 // (larger then register size) and since merge was not combined it will be
5320 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
5321 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
5322
5323 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
5324 //
5325 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
5326 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
5327 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
5328 SmallVector<Register, 8> NarrowTyElts;
5329 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
5330 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5331 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
5332 for (unsigned i = 0; i < NumParts; ++i) {
5334 for (unsigned j = 0; j < NumElts; ++j)
5335 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
5336 NarrowTyElts.push_back(
5337 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5338 }
5339
5340 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5341 MI.eraseFromParent();
5342 return Legalized;
5343}
5344
5347 unsigned TypeIdx,
5348 LLT NarrowVecTy) {
5349 auto [DstReg, SrcVec] = MI.getFirst2Regs();
5350 Register InsertVal;
5351 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5352
5353 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
5354 if (IsInsert)
5355 InsertVal = MI.getOperand(2).getReg();
5356
5357 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
5358 LLT VecTy = MRI.getType(SrcVec);
5359
5360 // If the index is a constant, we can really break this down as you would
5361 // expect, and index into the target size pieces.
5362 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
5363 if (MaybeCst) {
5364 uint64_t IdxVal = MaybeCst->Value.getZExtValue();
5365 // Avoid out of bounds indexing the pieces.
5366 if (IdxVal >= VecTy.getNumElements()) {
5367 MIRBuilder.buildUndef(DstReg);
5368 MI.eraseFromParent();
5369 return Legalized;
5370 }
5371
5372 if (!NarrowVecTy.isVector()) {
5373 SmallVector<Register, 8> SplitPieces;
5374 extractParts(MI.getOperand(1).getReg(), NarrowVecTy,
5375 VecTy.getNumElements(), SplitPieces, MIRBuilder, MRI);
5376 if (IsInsert) {
5377 SplitPieces[IdxVal] = InsertVal;
5378 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), SplitPieces);
5379 } else {
5380 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), SplitPieces[IdxVal]);
5381 }
5382 } else {
5383 SmallVector<Register, 8> VecParts;
5384 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5385
5386 // Build a sequence of NarrowTy pieces in VecParts for this operand.
5387 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5388 TargetOpcode::G_ANYEXT);
5389
5390 unsigned NewNumElts = NarrowVecTy.getNumElements();
5391
5392 LLT IdxTy = MRI.getType(Idx);
5393 int64_t PartIdx = IdxVal / NewNumElts;
5394 auto NewIdx =
5395 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5396
5397 if (IsInsert) {
5398 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5399
5400 // Use the adjusted index to insert into one of the subvectors.
5401 auto InsertPart = MIRBuilder.buildInsertVectorElement(
5402 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5403 VecParts[PartIdx] = InsertPart.getReg(0);
5404
5405 // Recombine the inserted subvector with the others to reform the result
5406 // vector.
5407 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5408 } else {
5409 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5410 }
5411 }
5412
5413 MI.eraseFromParent();
5414 return Legalized;
5415 }
5416
5417 // With a variable index, we can't perform the operation in a smaller type, so
5418 // we're forced to expand this.
5419 //
5420 // TODO: We could emit a chain of compare/select to figure out which piece to
5421 // index.
5423}
5424
5427 LLT NarrowTy) {
5428 // FIXME: Don't know how to handle secondary types yet.
5429 if (TypeIdx != 0)
5430 return UnableToLegalize;
5431
5432 if (!NarrowTy.isByteSized()) {
5433 LLVM_DEBUG(dbgs() << "Can't narrow load/store to non-byte-sized type\n");
5434 return UnableToLegalize;
5435 }
5436
5437 // This implementation doesn't work for atomics. Give up instead of doing
5438 // something invalid.
5439 if (LdStMI.isAtomic())
5440 return UnableToLegalize;
5441
5442 bool IsLoad = isa<GLoad>(LdStMI);
5443 Register ValReg = LdStMI.getReg(0);
5444 Register AddrReg = LdStMI.getPointerReg();
5445 LLT ValTy = MRI.getType(ValReg);
5446
5447 // FIXME: Do we need a distinct NarrowMemory legalize action?
5448 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
5449 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
5450 return UnableToLegalize;
5451 }
5452
5453 int NumParts = -1;
5454 int NumLeftover = -1;
5455 LLT LeftoverTy;
5456 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
5457 if (IsLoad) {
5458 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
5459 } else {
5460 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5461 NarrowLeftoverRegs, MIRBuilder, MRI)) {
5462 NumParts = NarrowRegs.size();
5463 NumLeftover = NarrowLeftoverRegs.size();
5464 }
5465 }
5466
5467 if (NumParts == -1)
5468 return UnableToLegalize;
5469
5470 LLT PtrTy = MRI.getType(AddrReg);
5471 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
5472
5473 unsigned TotalSize = ValTy.getSizeInBits();
5474
5475 // Split the load/store into PartTy sized pieces starting at Offset. If this
5476 // is a load, return the new registers in ValRegs. For a store, each elements
5477 // of ValRegs should be PartTy. Returns the next offset that needs to be
5478 // handled.
5479 bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian();
5480 auto MMO = LdStMI.getMMO();
5481 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
5482 unsigned NumParts, unsigned Offset) -> unsigned {
5483 MachineFunction &MF = MIRBuilder.getMF();
5484 unsigned PartSize = PartTy.getSizeInBits();
5485 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
5486 ++Idx) {
5487 unsigned ByteOffset = Offset / 8;
5488 Register NewAddrReg;
5489
5490 MIRBuilder.materializeObjectPtrOffset(NewAddrReg, AddrReg, OffsetTy,
5491 ByteOffset);
5492
5493 MachineMemOperand *NewMMO =
5494 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
5495
5496 if (IsLoad) {
5497 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5498 ValRegs.push_back(Dst);
5499 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5500 } else {
5501 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5502 }
5503 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
5504 }
5505
5506 return Offset;
5507 };
5508
5509 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
5510 unsigned HandledOffset =
5511 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
5512
5513 // Handle the rest of the register if this isn't an even type breakdown.
5514 if (LeftoverTy.isValid())
5515 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5516
5517 if (IsLoad) {
5518 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5519 LeftoverTy, NarrowLeftoverRegs);
5520 }
5521
5522 LdStMI.eraseFromParent();
5523 return Legalized;
5524}
5525
5528 LLT NarrowTy) {
5529 using namespace TargetOpcode;
5531 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5532
5533 switch (MI.getOpcode()) {
5534 case G_IMPLICIT_DEF:
5535 case G_TRUNC:
5536 case G_AND:
5537 case G_OR:
5538 case G_XOR:
5539 case G_ADD:
5540 case G_SUB:
5541 case G_MUL:
5542 case G_PTR_ADD:
5543 case G_SMULH:
5544 case G_UMULH:
5545 case G_FADD:
5546 case G_FMUL:
5547 case G_FSUB:
5548 case G_FNEG:
5549 case G_FABS:
5550 case G_FCANONICALIZE:
5551 case G_FDIV:
5552 case G_FREM:
5553 case G_FMA:
5554 case G_FMAD:
5555 case G_FPOW:
5556 case G_FEXP:
5557 case G_FEXP2:
5558 case G_FEXP10:
5559 case G_FLOG:
5560 case G_FLOG2:
5561 case G_FLOG10:
5562 case G_FLDEXP:
5563 case G_FNEARBYINT:
5564 case G_FCEIL:
5565 case G_FFLOOR:
5566 case G_FRINT:
5567 case G_INTRINSIC_LRINT:
5568 case G_INTRINSIC_LLRINT:
5569 case G_INTRINSIC_ROUND:
5570 case G_INTRINSIC_ROUNDEVEN:
5571 case G_LROUND:
5572 case G_LLROUND:
5573 case G_INTRINSIC_TRUNC:
5574 case G_FMODF:
5575 case G_FCOS:
5576 case G_FSIN:
5577 case G_FTAN:
5578 case G_FACOS:
5579 case G_FASIN:
5580 case G_FATAN:
5581 case G_FATAN2:
5582 case G_FCOSH:
5583 case G_FSINH:
5584 case G_FTANH:
5585 case G_FSQRT:
5586 case G_BSWAP:
5587 case G_BITREVERSE:
5588 case G_SDIV:
5589 case G_UDIV:
5590 case G_SREM:
5591 case G_UREM:
5592 case G_SDIVREM:
5593 case G_UDIVREM:
5594 case G_SMIN:
5595 case G_SMAX:
5596 case G_UMIN:
5597 case G_UMAX:
5598 case G_ABS:
5599 case G_FMINNUM:
5600 case G_FMAXNUM:
5601 case G_FMINNUM_IEEE:
5602 case G_FMAXNUM_IEEE:
5603 case G_FMINIMUM:
5604 case G_FMAXIMUM:
5605 case G_FMINIMUMNUM:
5606 case G_FMAXIMUMNUM:
5607 case G_FSHL:
5608 case G_FSHR:
5609 case G_ROTL:
5610 case G_ROTR:
5611 case G_FREEZE:
5612 case G_SADDSAT:
5613 case G_SSUBSAT:
5614 case G_UADDSAT:
5615 case G_USUBSAT:
5616 case G_UMULO:
5617 case G_SMULO:
5618 case G_SHL:
5619 case G_LSHR:
5620 case G_ASHR:
5621 case G_SSHLSAT:
5622 case G_USHLSAT:
5623 case G_CTLZ:
5624 case G_CTLZ_ZERO_UNDEF:
5625 case G_CTTZ:
5626 case G_CTTZ_ZERO_UNDEF:
5627 case G_CTPOP:
5628 case G_CTLS:
5629 case G_FCOPYSIGN:
5630 case G_ZEXT:
5631 case G_SEXT:
5632 case G_ANYEXT:
5633 case G_FPEXT:
5634 case G_FPTRUNC:
5635 case G_SITOFP:
5636 case G_UITOFP:
5637 case G_FPTOSI:
5638 case G_FPTOUI:
5639 case G_FPTOSI_SAT:
5640 case G_FPTOUI_SAT:
5641 case G_INTTOPTR:
5642 case G_PTRTOINT:
5643 case G_ADDRSPACE_CAST:
5644 case G_UADDO:
5645 case G_USUBO:
5646 case G_UADDE:
5647 case G_USUBE:
5648 case G_SADDO:
5649 case G_SSUBO:
5650 case G_SADDE:
5651 case G_SSUBE:
5652 case G_STRICT_FADD:
5653 case G_STRICT_FSUB:
5654 case G_STRICT_FMUL:
5655 case G_STRICT_FMA:
5656 case G_STRICT_FLDEXP:
5657 case G_FFREXP:
5658 case G_TRUNC_SSAT_S:
5659 case G_TRUNC_SSAT_U:
5660 case G_TRUNC_USAT_U:
5661 return fewerElementsVectorMultiEltType(GMI, NumElts);
5662 case G_ICMP:
5663 case G_FCMP:
5664 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
5665 case G_IS_FPCLASS:
5666 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
5667 case G_SELECT:
5668 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
5669 return fewerElementsVectorMultiEltType(GMI, NumElts);
5670 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
5671 case G_PHI:
5672 return fewerElementsVectorPhi(GMI, NumElts);
5673 case G_UNMERGE_VALUES:
5674 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
5675 case G_BUILD_VECTOR:
5676 assert(TypeIdx == 0 && "not a vector type index");
5677 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5678 case G_CONCAT_VECTORS:
5679 if (TypeIdx != 1) // TODO: This probably does work as expected already.
5680 return UnableToLegalize;
5681 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5682 case G_EXTRACT_VECTOR_ELT:
5683 case G_INSERT_VECTOR_ELT:
5684 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
5685 case G_LOAD:
5686 case G_STORE:
5687 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
5688 case G_SEXT_INREG:
5689 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
5691 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
5692 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5693 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5694 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
5695 case G_SHUFFLE_VECTOR:
5696 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
5697 case G_FPOWI:
5698 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
5699 case G_BITCAST:
5700 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
5701 case G_INTRINSIC_FPTRUNC_ROUND:
5702 return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
5703 default:
5704 return UnableToLegalize;
5705 }
5706}
5707
5710 LLT NarrowTy) {
5711 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
5712 "Not a bitcast operation");
5713
5714 if (TypeIdx != 0)
5715 return UnableToLegalize;
5716
5717 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5718
5719 unsigned NewElemCount =
5720 NarrowTy.getSizeInBits() / SrcTy.getScalarSizeInBits();
5721 SmallVector<Register> SrcVRegs, BitcastVRegs;
5722 if (NewElemCount == 1) {
5723 LLT SrcNarrowTy = SrcTy.getElementType();
5724
5725 auto Unmerge = MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg);
5726 getUnmergeResults(SrcVRegs, *Unmerge);
5727 } else {
5728 LLT SrcNarrowTy =
5730
5731 // Split the Src and Dst Reg into smaller registers
5732 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5733 return UnableToLegalize;
5734 }
5735
5736 // Build new smaller bitcast instructions
5737 // Not supporting Leftover types for now but will have to
5738 for (Register Reg : SrcVRegs)
5739 BitcastVRegs.push_back(MIRBuilder.buildBitcast(NarrowTy, Reg).getReg(0));
5740
5741 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5742 MI.eraseFromParent();
5743 return Legalized;
5744}
5745
5747 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5748 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5749 if (TypeIdx != 0)
5750 return UnableToLegalize;
5751
5752 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5753 MI.getFirst3RegLLTs();
5754 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5755 // The shuffle should be canonicalized by now.
5756 if (DstTy != Src1Ty)
5757 return UnableToLegalize;
5758 if (DstTy != Src2Ty)
5759 return UnableToLegalize;
5760
5761 if (!isPowerOf2_32(DstTy.getNumElements()))
5762 return UnableToLegalize;
5763
5764 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
5765 // Further legalization attempts will be needed to do split further.
5766 NarrowTy =
5767 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
5768 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5769
5770 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
5771 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
5772 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
5773 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5774 SplitSrc2Regs[1]};
5775
5776 Register Hi, Lo;
5777
5778 // If Lo or Hi uses elements from at most two of the four input vectors, then
5779 // express it as a vector shuffle of those two inputs. Otherwise extract the
5780 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
5782 for (unsigned High = 0; High < 2; ++High) {
5783 Register &Output = High ? Hi : Lo;
5784
5785 // Build a shuffle mask for the output, discovering on the fly which
5786 // input vectors to use as shuffle operands (recorded in InputUsed).
5787 // If building a suitable shuffle vector proves too hard, then bail
5788 // out with useBuildVector set.
5789 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
5790 unsigned FirstMaskIdx = High * NewElts;
5791 bool UseBuildVector = false;
5792 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5793 // The mask element. This indexes into the input.
5794 int Idx = Mask[FirstMaskIdx + MaskOffset];
5795
5796 // The input vector this mask element indexes into.
5797 unsigned Input = (unsigned)Idx / NewElts;
5798
5799 if (Input >= std::size(Inputs)) {
5800 // The mask element does not index into any input vector.
5801 Ops.push_back(-1);
5802 continue;
5803 }
5804
5805 // Turn the index into an offset from the start of the input vector.
5806 Idx -= Input * NewElts;
5807
5808 // Find or create a shuffle vector operand to hold this input.
5809 unsigned OpNo;
5810 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5811 if (InputUsed[OpNo] == Input) {
5812 // This input vector is already an operand.
5813 break;
5814 } else if (InputUsed[OpNo] == -1U) {
5815 // Create a new operand for this input vector.
5816 InputUsed[OpNo] = Input;
5817 break;
5818 }
5819 }
5820
5821 if (OpNo >= std::size(InputUsed)) {
5822 // More than two input vectors used! Give up on trying to create a
5823 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
5824 UseBuildVector = true;
5825 break;
5826 }
5827
5828 // Add the mask index for the new shuffle vector.
5829 Ops.push_back(Idx + OpNo * NewElts);
5830 }
5831
5832 if (UseBuildVector) {
5833 LLT EltTy = NarrowTy.getElementType();
5835
5836 // Extract the input elements by hand.
5837 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5838 // The mask element. This indexes into the input.
5839 int Idx = Mask[FirstMaskIdx + MaskOffset];
5840
5841 // The input vector this mask element indexes into.
5842 unsigned Input = (unsigned)Idx / NewElts;
5843
5844 if (Input >= std::size(Inputs)) {
5845 // The mask element is "undef" or indexes off the end of the input.
5846 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
5847 continue;
5848 }
5849
5850 // Turn the index into an offset from the start of the input vector.
5851 Idx -= Input * NewElts;
5852
5853 // Extract the vector element by hand.
5854 SVOps.push_back(MIRBuilder
5855 .buildExtractVectorElement(
5856 EltTy, Inputs[Input],
5857 MIRBuilder.buildConstant(LLT::scalar(32), Idx))
5858 .getReg(0));
5859 }
5860
5861 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
5862 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5863 } else if (InputUsed[0] == -1U) {
5864 // No input vectors were used! The result is undefined.
5865 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
5866 } else if (NewElts == 1) {
5867 Output = MIRBuilder.buildCopy(NarrowTy, Inputs[InputUsed[0]]).getReg(0);
5868 } else {
5869 Register Op0 = Inputs[InputUsed[0]];
5870 // If only one input was used, use an undefined vector for the other.
5871 Register Op1 = InputUsed[1] == -1U
5872 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
5873 : Inputs[InputUsed[1]];
5874 // At least one input vector was used. Create a new shuffle vector.
5875 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
5876 }
5877
5878 Ops.clear();
5879 }
5880
5881 MIRBuilder.buildMergeLikeInstr(DstReg, {Lo, Hi});
5882 MI.eraseFromParent();
5883 return Legalized;
5884}
5885
5887 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5888 auto &RdxMI = cast<GVecReduce>(MI);
5889
5890 if (TypeIdx != 1)
5891 return UnableToLegalize;
5892
5893 // The semantics of the normal non-sequential reductions allow us to freely
5894 // re-associate the operation.
5895 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5896
5897 if (NarrowTy.isVector() &&
5898 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
5899 return UnableToLegalize;
5900
5901 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5902 SmallVector<Register> SplitSrcs;
5903 // If NarrowTy is a scalar then we're being asked to scalarize.
5904 const unsigned NumParts =
5905 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
5906 : SrcTy.getNumElements();
5907
5908 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5909 if (NarrowTy.isScalar()) {
5910 if (DstTy != NarrowTy)
5911 return UnableToLegalize; // FIXME: handle implicit extensions.
5912
5913 if (isPowerOf2_32(NumParts)) {
5914 // Generate a tree of scalar operations to reduce the critical path.
5915 SmallVector<Register> PartialResults;
5916 unsigned NumPartsLeft = NumParts;
5917 while (NumPartsLeft > 1) {
5918 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5919 PartialResults.emplace_back(
5921 .buildInstr(ScalarOpc, {NarrowTy},
5922 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5923 .getReg(0));
5924 }
5925 SplitSrcs = PartialResults;
5926 PartialResults.clear();
5927 NumPartsLeft = SplitSrcs.size();
5928 }
5929 assert(SplitSrcs.size() == 1);
5930 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
5931 MI.eraseFromParent();
5932 return Legalized;
5933 }
5934 // If we can't generate a tree, then just do sequential operations.
5935 Register Acc = SplitSrcs[0];
5936 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
5937 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5938 .getReg(0);
5939 MIRBuilder.buildCopy(DstReg, Acc);
5940 MI.eraseFromParent();
5941 return Legalized;
5942 }
5943 SmallVector<Register> PartialReductions;
5944 for (unsigned Part = 0; Part < NumParts; ++Part) {
5945 PartialReductions.push_back(
5946 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5947 .getReg(0));
5948 }
5949
5950 // If the types involved are powers of 2, we can generate intermediate vector
5951 // ops, before generating a final reduction operation.
5952 if (isPowerOf2_32(SrcTy.getNumElements()) &&
5953 isPowerOf2_32(NarrowTy.getNumElements())) {
5954 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5955 }
5956
5957 Register Acc = PartialReductions[0];
5958 for (unsigned Part = 1; Part < NumParts; ++Part) {
5959 if (Part == NumParts - 1) {
5960 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
5961 {Acc, PartialReductions[Part]});
5962 } else {
5963 Acc = MIRBuilder
5964 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5965 .getReg(0);
5966 }
5967 }
5968 MI.eraseFromParent();
5969 return Legalized;
5970}
5971
5974 unsigned int TypeIdx,
5975 LLT NarrowTy) {
5976 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5977 MI.getFirst3RegLLTs();
5978 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5979 DstTy != NarrowTy)
5980 return UnableToLegalize;
5981
5982 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5983 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5984 "Unexpected vecreduce opcode");
5985 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5986 ? TargetOpcode::G_FADD
5987 : TargetOpcode::G_FMUL;
5988
5989 SmallVector<Register> SplitSrcs;
5990 unsigned NumParts = SrcTy.getNumElements();
5991 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5992 Register Acc = ScalarReg;
5993 for (unsigned i = 0; i < NumParts; i++)
5994 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5995 .getReg(0);
5996
5997 MIRBuilder.buildCopy(DstReg, Acc);
5998 MI.eraseFromParent();
5999 return Legalized;
6000}
6001
6003LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
6004 LLT SrcTy, LLT NarrowTy,
6005 unsigned ScalarOpc) {
6006 SmallVector<Register> SplitSrcs;
6007 // Split the sources into NarrowTy size pieces.
6008 extractParts(SrcReg, NarrowTy,
6009 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
6010 MIRBuilder, MRI);
6011 // We're going to do a tree reduction using vector operations until we have
6012 // one NarrowTy size value left.
6013 while (SplitSrcs.size() > 1) {
6014 SmallVector<Register> PartialRdxs;
6015 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
6016 Register LHS = SplitSrcs[Idx];
6017 Register RHS = SplitSrcs[Idx + 1];
6018 // Create the intermediate vector op.
6019 Register Res =
6020 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
6021 PartialRdxs.push_back(Res);
6022 }
6023 SplitSrcs = std::move(PartialRdxs);
6024 }
6025 // Finally generate the requested NarrowTy based reduction.
6026 Observer.changingInstr(MI);
6027 MI.getOperand(1).setReg(SplitSrcs[0]);
6028 Observer.changedInstr(MI);
6029 return Legalized;
6030}
6031
6034 const LLT HalfTy, const LLT AmtTy) {
6035
6036 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6037 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6038 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
6039
6040 if (Amt.isZero()) {
6041 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
6042 MI.eraseFromParent();
6043 return Legalized;
6044 }
6045
6046 LLT NVT = HalfTy;
6047 unsigned NVTBits = HalfTy.getSizeInBits();
6048 unsigned VTBits = 2 * NVTBits;
6049
6050 SrcOp Lo(Register(0)), Hi(Register(0));
6051 if (MI.getOpcode() == TargetOpcode::G_SHL) {
6052 if (Amt.ugt(VTBits)) {
6053 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
6054 } else if (Amt.ugt(NVTBits)) {
6055 Lo = MIRBuilder.buildConstant(NVT, 0);
6056 Hi = MIRBuilder.buildShl(NVT, InL,
6057 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6058 } else if (Amt == NVTBits) {
6059 Lo = MIRBuilder.buildConstant(NVT, 0);
6060 Hi = InL;
6061 } else {
6062 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
6063 auto OrLHS =
6064 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
6065 auto OrRHS = MIRBuilder.buildLShr(
6066 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6067 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6068 }
6069 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6070 if (Amt.ugt(VTBits)) {
6071 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
6072 } else if (Amt.ugt(NVTBits)) {
6073 Lo = MIRBuilder.buildLShr(NVT, InH,
6074 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6075 Hi = MIRBuilder.buildConstant(NVT, 0);
6076 } else if (Amt == NVTBits) {
6077 Lo = InH;
6078 Hi = MIRBuilder.buildConstant(NVT, 0);
6079 } else {
6080 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
6081
6082 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6083 auto OrRHS = MIRBuilder.buildShl(
6084 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6085
6086 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6087 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
6088 }
6089 } else {
6090 if (Amt.ugt(VTBits)) {
6091 Hi = Lo = MIRBuilder.buildAShr(
6092 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6093 } else if (Amt.ugt(NVTBits)) {
6094 Lo = MIRBuilder.buildAShr(NVT, InH,
6095 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6096 Hi = MIRBuilder.buildAShr(NVT, InH,
6097 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6098 } else if (Amt == NVTBits) {
6099 Lo = InH;
6100 Hi = MIRBuilder.buildAShr(NVT, InH,
6101 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6102 } else {
6103 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
6104
6105 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6106 auto OrRHS = MIRBuilder.buildShl(
6107 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6108
6109 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6110 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
6111 }
6112 }
6113
6114 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
6115 MI.eraseFromParent();
6116
6117 return Legalized;
6118}
6119
6122 LLT RequestedTy) {
6123 if (TypeIdx == 1) {
6124 Observer.changingInstr(MI);
6125 narrowScalarSrc(MI, RequestedTy, 2);
6126 Observer.changedInstr(MI);
6127 return Legalized;
6128 }
6129
6130 Register DstReg = MI.getOperand(0).getReg();
6131 LLT DstTy = MRI.getType(DstReg);
6132 if (DstTy.isVector())
6133 return UnableToLegalize;
6134
6135 Register Amt = MI.getOperand(2).getReg();
6136 LLT ShiftAmtTy = MRI.getType(Amt);
6137 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
6138 if (DstEltSize % 2 != 0)
6139 return UnableToLegalize;
6140
6141 // Check if we should use multi-way splitting instead of recursive binary
6142 // splitting.
6143 //
6144 // Multi-way splitting directly decomposes wide shifts (e.g., 128-bit ->
6145 // 4×32-bit) in a single legalization step, avoiding the recursive overhead
6146 // and dependency chains created by usual binary splitting approach
6147 // (128->64->32).
6148 //
6149 // The >= 8 parts threshold ensures we only use this optimization when binary
6150 // splitting would require multiple recursive passes, avoiding overhead for
6151 // simple 2-way splits where binary approach is sufficient.
6152 if (RequestedTy.isValid() && RequestedTy.isScalar() &&
6153 DstEltSize % RequestedTy.getSizeInBits() == 0) {
6154 const unsigned NumParts = DstEltSize / RequestedTy.getSizeInBits();
6155 // Use multiway if we have 8 or more parts (i.e., would need 3+ recursive
6156 // steps).
6157 if (NumParts >= 8)
6158 return narrowScalarShiftMultiway(MI, RequestedTy);
6159 }
6160
6161 // Fall back to binary splitting:
6162 // Ignore the input type. We can only go to exactly half the size of the
6163 // input. If that isn't small enough, the resulting pieces will be further
6164 // legalized.
6165 const unsigned NewBitSize = DstEltSize / 2;
6166 const LLT HalfTy = LLT::scalar(NewBitSize);
6167 const LLT CondTy = LLT::scalar(1);
6168
6169 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
6170 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
6171 ShiftAmtTy);
6172 }
6173
6174 // TODO: Expand with known bits.
6175
6176 // Handle the fully general expansion by an unknown amount.
6177 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
6178
6179 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6180 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6181 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
6182
6183 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
6184 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
6185
6186 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6187 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
6188 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
6189
6190 Register ResultRegs[2];
6191 switch (MI.getOpcode()) {
6192 case TargetOpcode::G_SHL: {
6193 // Short: ShAmt < NewBitSize
6194 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
6195
6196 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
6197 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
6198 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6199
6200 // Long: ShAmt >= NewBitSize
6201 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
6202 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
6203
6204 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
6205 auto Hi = MIRBuilder.buildSelect(
6206 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
6207
6208 ResultRegs[0] = Lo.getReg(0);
6209 ResultRegs[1] = Hi.getReg(0);
6210 break;
6211 }
6212 case TargetOpcode::G_LSHR:
6213 case TargetOpcode::G_ASHR: {
6214 // Short: ShAmt < NewBitSize
6215 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
6216
6217 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
6218 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
6219 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6220
6221 // Long: ShAmt >= NewBitSize
6223 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6224 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
6225 } else {
6226 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
6227 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
6228 }
6229 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
6230 {InH, AmtExcess}); // Lo from Hi part.
6231
6232 auto Lo = MIRBuilder.buildSelect(
6233 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
6234
6235 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
6236
6237 ResultRegs[0] = Lo.getReg(0);
6238 ResultRegs[1] = Hi.getReg(0);
6239 break;
6240 }
6241 default:
6242 llvm_unreachable("not a shift");
6243 }
6244
6245 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
6246 MI.eraseFromParent();
6247 return Legalized;
6248}
6249
6251 unsigned PartIdx,
6252 unsigned NumParts,
6253 ArrayRef<Register> SrcParts,
6254 const ShiftParams &Params,
6255 LLT TargetTy, LLT ShiftAmtTy) {
6256 auto WordShiftConst = getIConstantVRegVal(Params.WordShift, MRI);
6257 auto BitShiftConst = getIConstantVRegVal(Params.BitShift, MRI);
6258 assert(WordShiftConst && BitShiftConst && "Expected constants");
6259
6260 const unsigned ShiftWords = WordShiftConst->getZExtValue();
6261 const unsigned ShiftBits = BitShiftConst->getZExtValue();
6262 const bool NeedsInterWordShift = ShiftBits != 0;
6263
6264 switch (Opcode) {
6265 case TargetOpcode::G_SHL: {
6266 // Data moves from lower indices to higher indices
6267 // If this part would come from a source beyond our range, it's zero
6268 if (PartIdx < ShiftWords)
6269 return Params.Zero;
6270
6271 unsigned SrcIdx = PartIdx - ShiftWords;
6272 if (!NeedsInterWordShift)
6273 return SrcParts[SrcIdx];
6274
6275 // Combine shifted main part with carry from previous part
6276 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6277 if (SrcIdx > 0) {
6278 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx - 1],
6279 Params.InvBitShift);
6280 return MIRBuilder.buildOr(TargetTy, Hi, Lo).getReg(0);
6281 }
6282 return Hi.getReg(0);
6283 }
6284
6285 case TargetOpcode::G_LSHR: {
6286 unsigned SrcIdx = PartIdx + ShiftWords;
6287 if (SrcIdx >= NumParts)
6288 return Params.Zero;
6289 if (!NeedsInterWordShift)
6290 return SrcParts[SrcIdx];
6291
6292 // Combine shifted main part with carry from next part
6293 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6294 if (SrcIdx + 1 < NumParts) {
6295 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx + 1],
6296 Params.InvBitShift);
6297 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6298 }
6299 return Lo.getReg(0);
6300 }
6301
6302 case TargetOpcode::G_ASHR: {
6303 // Like LSHR but preserves sign bit
6304 unsigned SrcIdx = PartIdx + ShiftWords;
6305 if (SrcIdx >= NumParts)
6306 return Params.SignBit;
6307 if (!NeedsInterWordShift)
6308 return SrcParts[SrcIdx];
6309
6310 // Only the original MSB part uses arithmetic shift to preserve sign. All
6311 // other parts use logical shift since they're just moving data bits.
6312 auto Lo =
6313 (SrcIdx == NumParts - 1)
6314 ? MIRBuilder.buildAShr(TargetTy, SrcParts[SrcIdx], Params.BitShift)
6315 : MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6316 Register HiSrc =
6317 (SrcIdx + 1 < NumParts) ? SrcParts[SrcIdx + 1] : Params.SignBit;
6318 auto Hi = MIRBuilder.buildShl(TargetTy, HiSrc, Params.InvBitShift);
6319 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6320 }
6321
6322 default:
6323 llvm_unreachable("not a shift");
6324 }
6325}
6326
6328 Register MainOperand,
6329 Register ShiftAmt,
6330 LLT TargetTy,
6331 Register CarryOperand) {
6332 // This helper generates a single output part for variable shifts by combining
6333 // the main operand (shifted by BitShift) with carry bits from an adjacent
6334 // part.
6335
6336 // For G_ASHR, individual parts don't have their own sign bit, only the
6337 // complete value does. So we use LSHR for the main operand shift in ASHR
6338 // context.
6339 unsigned MainOpcode = (Opcode == TargetOpcode::G_ASHR)
6340 ? static_cast<unsigned>(TargetOpcode::G_LSHR)
6341 : Opcode;
6342
6343 // Perform the primary shift on the main operand
6344 Register MainShifted =
6345 MIRBuilder.buildInstr(MainOpcode, {TargetTy}, {MainOperand, ShiftAmt})
6346 .getReg(0);
6347
6348 // No carry operand available
6349 if (!CarryOperand.isValid())
6350 return MainShifted;
6351
6352 // If BitShift is 0 (word-aligned shift), no inter-word bit movement occurs,
6353 // so carry bits aren't needed.
6354 LLT ShiftAmtTy = MRI.getType(ShiftAmt);
6355 auto ZeroConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6356 LLT BoolTy = LLT::scalar(1);
6357 auto IsZeroBitShift =
6358 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, ShiftAmt, ZeroConst);
6359
6360 // Extract bits from the adjacent part that will "carry over" into this part.
6361 // The carry direction is opposite to the main shift direction, so we can
6362 // align the two shifted values before combining them with OR.
6363
6364 // Determine the carry shift opcode (opposite direction)
6365 unsigned CarryOpcode = (Opcode == TargetOpcode::G_SHL) ? TargetOpcode::G_LSHR
6366 : TargetOpcode::G_SHL;
6367
6368 // Calculate inverse shift amount: BitWidth - ShiftAmt
6369 auto TargetBitsConst =
6370 MIRBuilder.buildConstant(ShiftAmtTy, TargetTy.getScalarSizeInBits());
6371 auto InvShiftAmt = MIRBuilder.buildSub(ShiftAmtTy, TargetBitsConst, ShiftAmt);
6372
6373 // Shift the carry operand
6374 Register CarryBits =
6376 .buildInstr(CarryOpcode, {TargetTy}, {CarryOperand, InvShiftAmt})
6377 .getReg(0);
6378
6379 // If BitShift is 0, don't include carry bits (InvShiftAmt would equal
6380 // TargetBits which would be poison for the individual carry shift operation).
6381 auto ZeroReg = MIRBuilder.buildConstant(TargetTy, 0);
6382 Register SafeCarryBits =
6383 MIRBuilder.buildSelect(TargetTy, IsZeroBitShift, ZeroReg, CarryBits)
6384 .getReg(0);
6385
6386 // Combine the main shifted part with the carry bits
6387 return MIRBuilder.buildOr(TargetTy, MainShifted, SafeCarryBits).getReg(0);
6388}
6389
6392 const APInt &Amt,
6393 LLT TargetTy,
6394 LLT ShiftAmtTy) {
6395 // Any wide shift can be decomposed into WordShift + BitShift components.
6396 // When shift amount is known constant, directly compute the decomposition
6397 // values and generate constant registers.
6398 Register DstReg = MI.getOperand(0).getReg();
6399 Register SrcReg = MI.getOperand(1).getReg();
6400 LLT DstTy = MRI.getType(DstReg);
6401
6402 const unsigned DstBits = DstTy.getScalarSizeInBits();
6403 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6404 const unsigned NumParts = DstBits / TargetBits;
6405
6406 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6407
6408 // When the shift amount is known at compile time, we just calculate which
6409 // source parts contribute to each output part.
6410
6411 SmallVector<Register, 8> SrcParts;
6412 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6413
6414 if (Amt.isZero()) {
6415 // No shift needed, just copy
6416 MIRBuilder.buildMergeLikeInstr(DstReg, SrcParts);
6417 MI.eraseFromParent();
6418 return Legalized;
6419 }
6420
6421 ShiftParams Params;
6422 const unsigned ShiftWords = Amt.getZExtValue() / TargetBits;
6423 const unsigned ShiftBits = Amt.getZExtValue() % TargetBits;
6424
6425 // Generate constants and values needed by all shift types
6426 Params.WordShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftWords).getReg(0);
6427 Params.BitShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftBits).getReg(0);
6428 Params.InvBitShift =
6429 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - ShiftBits).getReg(0);
6430 Params.Zero = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6431
6432 // For ASHR, we need the sign-extended value to fill shifted-out positions
6433 if (MI.getOpcode() == TargetOpcode::G_ASHR)
6434 Params.SignBit =
6436 .buildAShr(TargetTy, SrcParts[SrcParts.size() - 1],
6437 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1))
6438 .getReg(0);
6439
6440 SmallVector<Register, 8> DstParts(NumParts);
6441 for (unsigned I = 0; I < NumParts; ++I)
6442 DstParts[I] = buildConstantShiftPart(MI.getOpcode(), I, NumParts, SrcParts,
6443 Params, TargetTy, ShiftAmtTy);
6444
6445 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6446 MI.eraseFromParent();
6447 return Legalized;
6448}
6449
6452 Register DstReg = MI.getOperand(0).getReg();
6453 Register SrcReg = MI.getOperand(1).getReg();
6454 Register AmtReg = MI.getOperand(2).getReg();
6455 LLT DstTy = MRI.getType(DstReg);
6456 LLT ShiftAmtTy = MRI.getType(AmtReg);
6457
6458 const unsigned DstBits = DstTy.getScalarSizeInBits();
6459 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6460 const unsigned NumParts = DstBits / TargetBits;
6461
6462 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6463 assert(isPowerOf2_32(TargetBits) && "Target bit width must be power of 2");
6464
6465 // If the shift amount is known at compile time, we can use direct indexing
6466 // instead of generating select chains in the general case.
6467 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI))
6468 return narrowScalarShiftByConstantMultiway(MI, VRegAndVal->Value, TargetTy,
6469 ShiftAmtTy);
6470
6471 // For runtime-variable shift amounts, we must generate a more complex
6472 // sequence that handles all possible shift values using select chains.
6473
6474 // Split the input into target-sized pieces
6475 SmallVector<Register, 8> SrcParts;
6476 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6477
6478 // Shifting by zero should be a no-op.
6479 auto ZeroAmtConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6480 LLT BoolTy = LLT::scalar(1);
6481 auto IsZeroShift =
6482 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, AmtReg, ZeroAmtConst);
6483
6484 // Any wide shift can be decomposed into two components:
6485 // 1. WordShift: number of complete target-sized words to shift
6486 // 2. BitShift: number of bits to shift within each word
6487 //
6488 // Example: 128-bit >> 50 with 32-bit target:
6489 // WordShift = 50 / 32 = 1 (shift right by 1 complete word)
6490 // BitShift = 50 % 32 = 18 (shift each word right by 18 bits)
6491 unsigned TargetBitsLog2 = Log2_32(TargetBits);
6492 auto TargetBitsLog2Const =
6493 MIRBuilder.buildConstant(ShiftAmtTy, TargetBitsLog2);
6494 auto TargetBitsMask = MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6495
6496 Register WordShift =
6497 MIRBuilder.buildLShr(ShiftAmtTy, AmtReg, TargetBitsLog2Const).getReg(0);
6498 Register BitShift =
6499 MIRBuilder.buildAnd(ShiftAmtTy, AmtReg, TargetBitsMask).getReg(0);
6500
6501 // Fill values:
6502 // - SHL/LSHR: fill with zeros
6503 // - ASHR: fill with sign-extended MSB
6504 Register ZeroReg = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6505
6506 Register FillValue;
6507 if (MI.getOpcode() == TargetOpcode::G_ASHR) {
6508 auto TargetBitsMinusOneConst =
6509 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6510 FillValue = MIRBuilder
6511 .buildAShr(TargetTy, SrcParts[NumParts - 1],
6512 TargetBitsMinusOneConst)
6513 .getReg(0);
6514 } else {
6515 FillValue = ZeroReg;
6516 }
6517
6518 SmallVector<Register, 8> DstParts(NumParts);
6519
6520 // For each output part, generate a select chain that chooses the correct
6521 // result based on the runtime WordShift value. This handles all possible
6522 // word shift amounts by pre-calculating what each would produce.
6523 for (unsigned I = 0; I < NumParts; ++I) {
6524 // Initialize with appropriate default value for this shift type
6525 Register InBoundsResult = FillValue;
6526
6527 // clang-format off
6528 // Build a branchless select chain by pre-computing results for all possible
6529 // WordShift values (0 to NumParts-1). Each iteration nests a new select:
6530 //
6531 // K=0: select(WordShift==0, result0, FillValue)
6532 // K=1: select(WordShift==1, result1, select(WordShift==0, result0, FillValue))
6533 // K=2: select(WordShift==2, result2, select(WordShift==1, result1, select(...)))
6534 // clang-format on
6535 for (unsigned K = 0; K < NumParts; ++K) {
6536 auto WordShiftKConst = MIRBuilder.buildConstant(ShiftAmtTy, K);
6537 auto IsWordShiftK = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy,
6538 WordShift, WordShiftKConst);
6539
6540 // Calculate source indices for this word shift
6541 //
6542 // For 4-part 128-bit value with K=1 word shift:
6543 // SHL: [3][2][1][0] << K => [2][1][0][Z]
6544 // -> (MainIdx = I-K, CarryIdx = I-K-1)
6545 // LSHR: [3][2][1][0] >> K => [Z][3][2][1]
6546 // -> (MainIdx = I+K, CarryIdx = I+K+1)
6547 int MainSrcIdx;
6548 int CarrySrcIdx; // Index for the word that provides the carried-in bits.
6549
6550 switch (MI.getOpcode()) {
6551 case TargetOpcode::G_SHL:
6552 MainSrcIdx = (int)I - (int)K;
6553 CarrySrcIdx = MainSrcIdx - 1;
6554 break;
6555 case TargetOpcode::G_LSHR:
6556 case TargetOpcode::G_ASHR:
6557 MainSrcIdx = (int)I + (int)K;
6558 CarrySrcIdx = MainSrcIdx + 1;
6559 break;
6560 default:
6561 llvm_unreachable("Not a shift");
6562 }
6563
6564 // Check bounds and build the result for this word shift
6565 Register ResultForK;
6566 if (MainSrcIdx >= 0 && MainSrcIdx < (int)NumParts) {
6567 Register MainOp = SrcParts[MainSrcIdx];
6568 Register CarryOp;
6569
6570 // Determine carry operand with bounds checking
6571 if (CarrySrcIdx >= 0 && CarrySrcIdx < (int)NumParts)
6572 CarryOp = SrcParts[CarrySrcIdx];
6573 else if (MI.getOpcode() == TargetOpcode::G_ASHR &&
6574 CarrySrcIdx >= (int)NumParts)
6575 CarryOp = FillValue; // Use sign extension
6576
6577 ResultForK = buildVariableShiftPart(MI.getOpcode(), MainOp, BitShift,
6578 TargetTy, CarryOp);
6579 } else {
6580 // Out of bounds - use fill value for this k
6581 ResultForK = FillValue;
6582 }
6583
6584 // Select this result if WordShift equals k
6585 InBoundsResult =
6587 .buildSelect(TargetTy, IsWordShiftK, ResultForK, InBoundsResult)
6588 .getReg(0);
6589 }
6590
6591 // Handle zero-shift special case: if shift is 0, use original input
6592 DstParts[I] =
6594 .buildSelect(TargetTy, IsZeroShift, SrcParts[I], InBoundsResult)
6595 .getReg(0);
6596 }
6597
6598 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6599 MI.eraseFromParent();
6600 return Legalized;
6601}
6602
6605 LLT MoreTy) {
6606 assert(TypeIdx == 0 && "Expecting only Idx 0");
6607
6608 Observer.changingInstr(MI);
6609 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
6610 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
6611 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
6612 moreElementsVectorSrc(MI, MoreTy, I);
6613 }
6614
6615 MachineBasicBlock &MBB = *MI.getParent();
6616 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
6617 moreElementsVectorDst(MI, MoreTy, 0);
6618 Observer.changedInstr(MI);
6619 return Legalized;
6620}
6621
6622MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
6623 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
6624 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
6625
6626 switch (Opcode) {
6627 default:
6629 "getNeutralElementForVecReduce called with invalid opcode!");
6630 case TargetOpcode::G_VECREDUCE_ADD:
6631 case TargetOpcode::G_VECREDUCE_OR:
6632 case TargetOpcode::G_VECREDUCE_XOR:
6633 case TargetOpcode::G_VECREDUCE_UMAX:
6634 return MIRBuilder.buildConstant(Ty, 0);
6635 case TargetOpcode::G_VECREDUCE_MUL:
6636 return MIRBuilder.buildConstant(Ty, 1);
6637 case TargetOpcode::G_VECREDUCE_AND:
6638 case TargetOpcode::G_VECREDUCE_UMIN:
6640 Ty, APInt::getAllOnes(Ty.getScalarSizeInBits()));
6641 case TargetOpcode::G_VECREDUCE_SMAX:
6643 Ty, APInt::getSignedMinValue(Ty.getSizeInBits()));
6644 case TargetOpcode::G_VECREDUCE_SMIN:
6646 Ty, APInt::getSignedMaxValue(Ty.getSizeInBits()));
6647 case TargetOpcode::G_VECREDUCE_FADD:
6648 return MIRBuilder.buildFConstant(Ty, -0.0);
6649 case TargetOpcode::G_VECREDUCE_FMUL:
6650 return MIRBuilder.buildFConstant(Ty, 1.0);
6651 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6652 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6653 assert(false && "getNeutralElementForVecReduce unimplemented for "
6654 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6655 }
6656 llvm_unreachable("switch expected to return!");
6657}
6658
6661 LLT MoreTy) {
6662 unsigned Opc = MI.getOpcode();
6663 switch (Opc) {
6664 case TargetOpcode::G_IMPLICIT_DEF:
6665 case TargetOpcode::G_LOAD: {
6666 if (TypeIdx != 0)
6667 return UnableToLegalize;
6668 Observer.changingInstr(MI);
6669 moreElementsVectorDst(MI, MoreTy, 0);
6670 Observer.changedInstr(MI);
6671 return Legalized;
6672 }
6673 case TargetOpcode::G_STORE:
6674 if (TypeIdx != 0)
6675 return UnableToLegalize;
6676 Observer.changingInstr(MI);
6677 moreElementsVectorSrc(MI, MoreTy, 0);
6678 Observer.changedInstr(MI);
6679 return Legalized;
6680 case TargetOpcode::G_AND:
6681 case TargetOpcode::G_OR:
6682 case TargetOpcode::G_XOR:
6683 case TargetOpcode::G_ADD:
6684 case TargetOpcode::G_SUB:
6685 case TargetOpcode::G_MUL:
6686 case TargetOpcode::G_FADD:
6687 case TargetOpcode::G_FSUB:
6688 case TargetOpcode::G_FMUL:
6689 case TargetOpcode::G_FDIV:
6690 case TargetOpcode::G_FCOPYSIGN:
6691 case TargetOpcode::G_UADDSAT:
6692 case TargetOpcode::G_USUBSAT:
6693 case TargetOpcode::G_SADDSAT:
6694 case TargetOpcode::G_SSUBSAT:
6695 case TargetOpcode::G_SMIN:
6696 case TargetOpcode::G_SMAX:
6697 case TargetOpcode::G_UMIN:
6698 case TargetOpcode::G_UMAX:
6699 case TargetOpcode::G_FMINNUM:
6700 case TargetOpcode::G_FMAXNUM:
6701 case TargetOpcode::G_FMINNUM_IEEE:
6702 case TargetOpcode::G_FMAXNUM_IEEE:
6703 case TargetOpcode::G_FMINIMUM:
6704 case TargetOpcode::G_FMAXIMUM:
6705 case TargetOpcode::G_FMINIMUMNUM:
6706 case TargetOpcode::G_FMAXIMUMNUM:
6707 case TargetOpcode::G_STRICT_FADD:
6708 case TargetOpcode::G_STRICT_FSUB:
6709 case TargetOpcode::G_STRICT_FMUL: {
6710 Observer.changingInstr(MI);
6711 moreElementsVectorSrc(MI, MoreTy, 1);
6712 moreElementsVectorSrc(MI, MoreTy, 2);
6713 moreElementsVectorDst(MI, MoreTy, 0);
6714 Observer.changedInstr(MI);
6715 return Legalized;
6716 }
6717 case TargetOpcode::G_SHL:
6718 case TargetOpcode::G_ASHR:
6719 case TargetOpcode::G_LSHR: {
6720 Observer.changingInstr(MI);
6721 moreElementsVectorSrc(MI, MoreTy, 1);
6722 // The shift operand may have a different scalar type from the source and
6723 // destination operands.
6724 LLT ShiftMoreTy = MoreTy.changeElementType(
6725 MRI.getType(MI.getOperand(2).getReg()).getElementType());
6726 moreElementsVectorSrc(MI, ShiftMoreTy, 2);
6727 moreElementsVectorDst(MI, MoreTy, 0);
6728 Observer.changedInstr(MI);
6729 return Legalized;
6730 }
6731 case TargetOpcode::G_FMA:
6732 case TargetOpcode::G_STRICT_FMA:
6733 case TargetOpcode::G_FSHR:
6734 case TargetOpcode::G_FSHL: {
6735 Observer.changingInstr(MI);
6736 moreElementsVectorSrc(MI, MoreTy, 1);
6737 moreElementsVectorSrc(MI, MoreTy, 2);
6738 moreElementsVectorSrc(MI, MoreTy, 3);
6739 moreElementsVectorDst(MI, MoreTy, 0);
6740 Observer.changedInstr(MI);
6741 return Legalized;
6742 }
6743 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6744 case TargetOpcode::G_EXTRACT:
6745 if (TypeIdx != 1)
6746 return UnableToLegalize;
6747 Observer.changingInstr(MI);
6748 moreElementsVectorSrc(MI, MoreTy, 1);
6749 Observer.changedInstr(MI);
6750 return Legalized;
6751 case TargetOpcode::G_INSERT:
6752 case TargetOpcode::G_INSERT_VECTOR_ELT:
6753 case TargetOpcode::G_FREEZE:
6754 case TargetOpcode::G_FNEG:
6755 case TargetOpcode::G_FABS:
6756 case TargetOpcode::G_FSQRT:
6757 case TargetOpcode::G_FCEIL:
6758 case TargetOpcode::G_FFLOOR:
6759 case TargetOpcode::G_FNEARBYINT:
6760 case TargetOpcode::G_FRINT:
6761 case TargetOpcode::G_INTRINSIC_ROUND:
6762 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6763 case TargetOpcode::G_INTRINSIC_TRUNC:
6764 case TargetOpcode::G_BITREVERSE:
6765 case TargetOpcode::G_BSWAP:
6766 case TargetOpcode::G_FCANONICALIZE:
6767 case TargetOpcode::G_SEXT_INREG:
6768 case TargetOpcode::G_ABS:
6769 case TargetOpcode::G_CTLZ:
6770 case TargetOpcode::G_CTPOP:
6771 if (TypeIdx != 0)
6772 return UnableToLegalize;
6773 Observer.changingInstr(MI);
6774 moreElementsVectorSrc(MI, MoreTy, 1);
6775 moreElementsVectorDst(MI, MoreTy, 0);
6776 Observer.changedInstr(MI);
6777 return Legalized;
6778 case TargetOpcode::G_SELECT: {
6779 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
6780 if (TypeIdx == 1) {
6781 if (!CondTy.isScalar() ||
6782 DstTy.getElementCount() != MoreTy.getElementCount())
6783 return UnableToLegalize;
6784
6785 // This is turning a scalar select of vectors into a vector
6786 // select. Broadcast the select condition.
6787 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6788 Observer.changingInstr(MI);
6789 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6790 Observer.changedInstr(MI);
6791 return Legalized;
6792 }
6793
6794 if (CondTy.isVector())
6795 return UnableToLegalize;
6796
6797 Observer.changingInstr(MI);
6798 moreElementsVectorSrc(MI, MoreTy, 2);
6799 moreElementsVectorSrc(MI, MoreTy, 3);
6800 moreElementsVectorDst(MI, MoreTy, 0);
6801 Observer.changedInstr(MI);
6802 return Legalized;
6803 }
6804 case TargetOpcode::G_UNMERGE_VALUES:
6805 return UnableToLegalize;
6806 case TargetOpcode::G_PHI:
6807 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
6808 case TargetOpcode::G_SHUFFLE_VECTOR:
6809 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
6810 case TargetOpcode::G_BUILD_VECTOR: {
6812 for (auto Op : MI.uses()) {
6813 Elts.push_back(Op.getReg());
6814 }
6815
6816 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
6817 Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
6818 }
6819
6820 MIRBuilder.buildDeleteTrailingVectorElements(
6821 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
6822 MI.eraseFromParent();
6823 return Legalized;
6824 }
6825 case TargetOpcode::G_SEXT:
6826 case TargetOpcode::G_ZEXT:
6827 case TargetOpcode::G_ANYEXT:
6828 case TargetOpcode::G_TRUNC:
6829 case TargetOpcode::G_FPTRUNC:
6830 case TargetOpcode::G_FPEXT:
6831 case TargetOpcode::G_FPTOSI:
6832 case TargetOpcode::G_FPTOUI:
6833 case TargetOpcode::G_FPTOSI_SAT:
6834 case TargetOpcode::G_FPTOUI_SAT:
6835 case TargetOpcode::G_SITOFP:
6836 case TargetOpcode::G_UITOFP: {
6837 Observer.changingInstr(MI);
6838 LLT SrcExtTy;
6839 LLT DstExtTy;
6840 if (TypeIdx == 0) {
6841 DstExtTy = MoreTy;
6842 SrcExtTy = MoreTy.changeElementType(
6843 MRI.getType(MI.getOperand(1).getReg()).getElementType());
6844 } else {
6845 DstExtTy = MoreTy.changeElementType(
6846 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6847 SrcExtTy = MoreTy;
6848 }
6849 moreElementsVectorSrc(MI, SrcExtTy, 1);
6850 moreElementsVectorDst(MI, DstExtTy, 0);
6851 Observer.changedInstr(MI);
6852 return Legalized;
6853 }
6854 case TargetOpcode::G_ICMP:
6855 case TargetOpcode::G_FCMP: {
6856 if (TypeIdx != 1)
6857 return UnableToLegalize;
6858
6859 Observer.changingInstr(MI);
6860 moreElementsVectorSrc(MI, MoreTy, 2);
6861 moreElementsVectorSrc(MI, MoreTy, 3);
6862 LLT CondTy = MoreTy.changeVectorElementType(
6863 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6864 moreElementsVectorDst(MI, CondTy, 0);
6865 Observer.changedInstr(MI);
6866 return Legalized;
6867 }
6868 case TargetOpcode::G_BITCAST: {
6869 if (TypeIdx != 0)
6870 return UnableToLegalize;
6871
6872 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
6873 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6874
6875 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
6876 if (coefficient % DstTy.getNumElements() != 0)
6877 return UnableToLegalize;
6878
6879 coefficient = coefficient / DstTy.getNumElements();
6880
6881 LLT NewTy = SrcTy.changeElementCount(
6882 ElementCount::get(coefficient, MoreTy.isScalable()));
6883 Observer.changingInstr(MI);
6884 moreElementsVectorSrc(MI, NewTy, 1);
6885 moreElementsVectorDst(MI, MoreTy, 0);
6886 Observer.changedInstr(MI);
6887 return Legalized;
6888 }
6889 case TargetOpcode::G_VECREDUCE_FADD:
6890 case TargetOpcode::G_VECREDUCE_FMUL:
6891 case TargetOpcode::G_VECREDUCE_ADD:
6892 case TargetOpcode::G_VECREDUCE_MUL:
6893 case TargetOpcode::G_VECREDUCE_AND:
6894 case TargetOpcode::G_VECREDUCE_OR:
6895 case TargetOpcode::G_VECREDUCE_XOR:
6896 case TargetOpcode::G_VECREDUCE_SMAX:
6897 case TargetOpcode::G_VECREDUCE_SMIN:
6898 case TargetOpcode::G_VECREDUCE_UMAX:
6899 case TargetOpcode::G_VECREDUCE_UMIN: {
6900 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
6901 MachineOperand &MO = MI.getOperand(1);
6902 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6903 auto NeutralElement = getNeutralElementForVecReduce(
6904 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
6905
6906 LLT IdxTy(TLI.getVectorIdxLLT(MIRBuilder.getDataLayout()));
6907 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
6908 i != e; i++) {
6909 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
6910 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6911 NeutralElement, Idx);
6912 }
6913
6914 Observer.changingInstr(MI);
6915 MO.setReg(NewVec.getReg(0));
6916 Observer.changedInstr(MI);
6917 return Legalized;
6918 }
6919
6920 default:
6921 return UnableToLegalize;
6922 }
6923}
6924
6927 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6928 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6929 unsigned MaskNumElts = Mask.size();
6930 unsigned SrcNumElts = SrcTy.getNumElements();
6931 LLT DestEltTy = DstTy.getElementType();
6932
6933 if (MaskNumElts == SrcNumElts)
6934 return Legalized;
6935
6936 if (MaskNumElts < SrcNumElts) {
6937 // Extend mask to match new destination vector size with
6938 // undef values.
6939 SmallVector<int, 16> NewMask(SrcNumElts, -1);
6940 llvm::copy(Mask, NewMask.begin());
6941
6942 moreElementsVectorDst(MI, SrcTy, 0);
6943 MIRBuilder.setInstrAndDebugLoc(MI);
6944 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6945 MI.getOperand(1).getReg(),
6946 MI.getOperand(2).getReg(), NewMask);
6947 MI.eraseFromParent();
6948
6949 return Legalized;
6950 }
6951
6952 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
6953 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
6954 LLT PaddedTy =
6955 DstTy.changeVectorElementCount(ElementCount::getFixed(PaddedMaskNumElts));
6956
6957 // Create new source vectors by concatenating the initial
6958 // source vectors with undefined vectors of the same size.
6959 auto Undef = MIRBuilder.buildUndef(SrcTy);
6960 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
6961 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
6962 MOps1[0] = MI.getOperand(1).getReg();
6963 MOps2[0] = MI.getOperand(2).getReg();
6964
6965 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
6966 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
6967
6968 // Readjust mask for new input vector length.
6969 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
6970 for (unsigned I = 0; I != MaskNumElts; ++I) {
6971 int Idx = Mask[I];
6972 if (Idx >= static_cast<int>(SrcNumElts))
6973 Idx += PaddedMaskNumElts - SrcNumElts;
6974 MappedOps[I] = Idx;
6975 }
6976
6977 // If we got more elements than required, extract subvector.
6978 if (MaskNumElts != PaddedMaskNumElts) {
6979 auto Shuffle =
6980 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
6981
6982 SmallVector<Register, 16> Elts(MaskNumElts);
6983 for (unsigned I = 0; I < MaskNumElts; ++I) {
6984 Elts[I] =
6985 MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle, I)
6986 .getReg(0);
6987 }
6988 MIRBuilder.buildBuildVector(DstReg, Elts);
6989 } else {
6990 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
6991 }
6992
6993 MI.eraseFromParent();
6995}
6996
6999 unsigned int TypeIdx, LLT MoreTy) {
7000 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
7001 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
7002 unsigned NumElts = DstTy.getNumElements();
7003 unsigned WidenNumElts = MoreTy.getNumElements();
7004
7005 if (DstTy.isVector() && Src1Ty.isVector() &&
7006 DstTy.getNumElements() != Src1Ty.getNumElements()) {
7008 }
7009
7010 if (TypeIdx != 0)
7011 return UnableToLegalize;
7012
7013 // Expect a canonicalized shuffle.
7014 if (DstTy != Src1Ty || DstTy != Src2Ty)
7015 return UnableToLegalize;
7016
7017 moreElementsVectorSrc(MI, MoreTy, 1);
7018 moreElementsVectorSrc(MI, MoreTy, 2);
7019
7020 // Adjust mask based on new input vector length.
7021 SmallVector<int, 16> NewMask(WidenNumElts, -1);
7022 for (unsigned I = 0; I != NumElts; ++I) {
7023 int Idx = Mask[I];
7024 if (Idx < static_cast<int>(NumElts))
7025 NewMask[I] = Idx;
7026 else
7027 NewMask[I] = Idx - NumElts + WidenNumElts;
7028 }
7029 moreElementsVectorDst(MI, MoreTy, 0);
7030 MIRBuilder.setInstrAndDebugLoc(MI);
7031 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
7032 MI.getOperand(1).getReg(),
7033 MI.getOperand(2).getReg(), NewMask);
7034 MI.eraseFromParent();
7035 return Legalized;
7036}
7037
7038void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
7039 ArrayRef<Register> Src1Regs,
7040 ArrayRef<Register> Src2Regs,
7041 LLT NarrowTy) {
7043 unsigned SrcParts = Src1Regs.size();
7044 unsigned DstParts = DstRegs.size();
7045
7046 unsigned DstIdx = 0; // Low bits of the result.
7047 Register FactorSum =
7048 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
7049 DstRegs[DstIdx] = FactorSum;
7050
7051 Register CarrySumPrevDstIdx;
7053
7054 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
7055 // Collect low parts of muls for DstIdx.
7056 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
7057 i <= std::min(DstIdx, SrcParts - 1); ++i) {
7059 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
7060 Factors.push_back(Mul.getReg(0));
7061 }
7062 // Collect high parts of muls from previous DstIdx.
7063 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
7064 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
7065 MachineInstrBuilder Umulh =
7066 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
7067 Factors.push_back(Umulh.getReg(0));
7068 }
7069 // Add CarrySum from additions calculated for previous DstIdx.
7070 if (DstIdx != 1) {
7071 Factors.push_back(CarrySumPrevDstIdx);
7072 }
7073
7074 Register CarrySum;
7075 // Add all factors and accumulate all carries into CarrySum.
7076 if (DstIdx != DstParts - 1) {
7077 MachineInstrBuilder Uaddo =
7078 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
7079 FactorSum = Uaddo.getReg(0);
7080 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
7081 for (unsigned i = 2; i < Factors.size(); ++i) {
7082 MachineInstrBuilder Uaddo =
7083 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
7084 FactorSum = Uaddo.getReg(0);
7085 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
7086 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
7087 }
7088 } else {
7089 // Since value for the next index is not calculated, neither is CarrySum.
7090 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
7091 for (unsigned i = 2; i < Factors.size(); ++i)
7092 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
7093 }
7094
7095 CarrySumPrevDstIdx = CarrySum;
7096 DstRegs[DstIdx] = FactorSum;
7097 Factors.clear();
7098 }
7099}
7100
7103 LLT NarrowTy) {
7104 if (TypeIdx != 0)
7105 return UnableToLegalize;
7106
7107 Register DstReg = MI.getOperand(0).getReg();
7108 LLT DstType = MRI.getType(DstReg);
7109 // FIXME: add support for vector types
7110 if (DstType.isVector())
7111 return UnableToLegalize;
7112
7113 unsigned Opcode = MI.getOpcode();
7114 unsigned OpO, OpE, OpF;
7115 switch (Opcode) {
7116 case TargetOpcode::G_SADDO:
7117 case TargetOpcode::G_SADDE:
7118 case TargetOpcode::G_UADDO:
7119 case TargetOpcode::G_UADDE:
7120 case TargetOpcode::G_ADD:
7121 OpO = TargetOpcode::G_UADDO;
7122 OpE = TargetOpcode::G_UADDE;
7123 OpF = TargetOpcode::G_UADDE;
7124 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
7125 OpF = TargetOpcode::G_SADDE;
7126 break;
7127 case TargetOpcode::G_SSUBO:
7128 case TargetOpcode::G_SSUBE:
7129 case TargetOpcode::G_USUBO:
7130 case TargetOpcode::G_USUBE:
7131 case TargetOpcode::G_SUB:
7132 OpO = TargetOpcode::G_USUBO;
7133 OpE = TargetOpcode::G_USUBE;
7134 OpF = TargetOpcode::G_USUBE;
7135 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
7136 OpF = TargetOpcode::G_SSUBE;
7137 break;
7138 default:
7139 llvm_unreachable("Unexpected add/sub opcode!");
7140 }
7141
7142 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
7143 unsigned NumDefs = MI.getNumExplicitDefs();
7144 Register Src1 = MI.getOperand(NumDefs).getReg();
7145 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
7146 Register CarryDst, CarryIn;
7147 if (NumDefs == 2)
7148 CarryDst = MI.getOperand(1).getReg();
7149 if (MI.getNumOperands() == NumDefs + 3)
7150 CarryIn = MI.getOperand(NumDefs + 2).getReg();
7151
7152 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7153 LLT LeftoverTy, DummyTy;
7154 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
7155 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
7156 MIRBuilder, MRI);
7157 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
7158 MRI);
7159
7160 int NarrowParts = Src1Regs.size();
7161 Src1Regs.append(Src1Left);
7162 Src2Regs.append(Src2Left);
7163 DstRegs.reserve(Src1Regs.size());
7164
7165 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
7166 Register DstReg =
7167 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
7168 Register CarryOut;
7169 // Forward the final carry-out to the destination register
7170 if (i == e - 1 && CarryDst)
7171 CarryOut = CarryDst;
7172 else
7173 CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
7174
7175 if (!CarryIn) {
7176 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
7177 {Src1Regs[i], Src2Regs[i]});
7178 } else if (i == e - 1) {
7179 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
7180 {Src1Regs[i], Src2Regs[i], CarryIn});
7181 } else {
7182 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
7183 {Src1Regs[i], Src2Regs[i], CarryIn});
7184 }
7185
7186 DstRegs.push_back(DstReg);
7187 CarryIn = CarryOut;
7188 }
7189 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
7190 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
7191 ArrayRef(DstRegs).drop_front(NarrowParts));
7192
7193 MI.eraseFromParent();
7194 return Legalized;
7195}
7196
7199 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
7200
7201 LLT Ty = MRI.getType(DstReg);
7202 if (Ty.isVector())
7203 return UnableToLegalize;
7204
7205 unsigned Size = Ty.getSizeInBits();
7206 unsigned NarrowSize = NarrowTy.getSizeInBits();
7207 if (Size % NarrowSize != 0)
7208 return UnableToLegalize;
7209
7210 unsigned NumParts = Size / NarrowSize;
7211 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
7212 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
7213
7214 SmallVector<Register, 2> Src1Parts, Src2Parts;
7215 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
7216 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
7217 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
7218 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
7219
7220 // Take only high half of registers if this is high mul.
7221 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
7222 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7223 MI.eraseFromParent();
7224 return Legalized;
7225}
7226
7229 LLT NarrowTy) {
7230 if (TypeIdx != 0)
7231 return UnableToLegalize;
7232
7233 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
7234
7235 Register Src = MI.getOperand(1).getReg();
7236 LLT SrcTy = MRI.getType(Src);
7237
7238 // If all finite floats fit into the narrowed integer type, we can just swap
7239 // out the result type. This is practically only useful for conversions from
7240 // half to at least 16-bits, so just handle the one case.
7241 if (SrcTy.getScalarType() != LLT::scalar(16) ||
7242 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
7243 return UnableToLegalize;
7244
7245 Observer.changingInstr(MI);
7246 narrowScalarDst(MI, NarrowTy, 0,
7247 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
7248 Observer.changedInstr(MI);
7249 return Legalized;
7250}
7251
7254 LLT NarrowTy) {
7255 if (TypeIdx != 1)
7256 return UnableToLegalize;
7257
7258 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7259
7260 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7261 // FIXME: add support for when SizeOp1 isn't an exact multiple of
7262 // NarrowSize.
7263 if (SizeOp1 % NarrowSize != 0)
7264 return UnableToLegalize;
7265 int NumParts = SizeOp1 / NarrowSize;
7266
7267 SmallVector<Register, 2> SrcRegs, DstRegs;
7268 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
7269 MIRBuilder, MRI);
7270
7271 Register OpReg = MI.getOperand(0).getReg();
7272 uint64_t OpStart = MI.getOperand(2).getImm();
7273 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7274 for (int i = 0; i < NumParts; ++i) {
7275 unsigned SrcStart = i * NarrowSize;
7276
7277 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
7278 // No part of the extract uses this subregister, ignore it.
7279 continue;
7280 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7281 // The entire subregister is extracted, forward the value.
7282 DstRegs.push_back(SrcRegs[i]);
7283 continue;
7284 }
7285
7286 // OpSegStart is where this destination segment would start in OpReg if it
7287 // extended infinitely in both directions.
7288 int64_t ExtractOffset;
7289 uint64_t SegSize;
7290 if (OpStart < SrcStart) {
7291 ExtractOffset = 0;
7292 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
7293 } else {
7294 ExtractOffset = OpStart - SrcStart;
7295 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
7296 }
7297
7298 Register SegReg = SrcRegs[i];
7299 if (ExtractOffset != 0 || SegSize != NarrowSize) {
7300 // A genuine extract is needed.
7301 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7302 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
7303 }
7304
7305 DstRegs.push_back(SegReg);
7306 }
7307
7308 Register DstReg = MI.getOperand(0).getReg();
7309 if (MRI.getType(DstReg).isVector())
7310 MIRBuilder.buildBuildVector(DstReg, DstRegs);
7311 else if (DstRegs.size() > 1)
7312 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7313 else
7314 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
7315 MI.eraseFromParent();
7316 return Legalized;
7317}
7318
7321 LLT NarrowTy) {
7322 // FIXME: Don't know how to handle secondary types yet.
7323 if (TypeIdx != 0)
7324 return UnableToLegalize;
7325
7326 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
7327 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7328 LLT LeftoverTy;
7329 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
7330 LeftoverRegs, MIRBuilder, MRI);
7331
7332 SrcRegs.append(LeftoverRegs);
7333
7334 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7335 Register OpReg = MI.getOperand(2).getReg();
7336 uint64_t OpStart = MI.getOperand(3).getImm();
7337 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7338 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
7339 unsigned DstStart = I * NarrowSize;
7340
7341 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7342 // The entire subregister is defined by this insert, forward the new
7343 // value.
7344 DstRegs.push_back(OpReg);
7345 continue;
7346 }
7347
7348 Register SrcReg = SrcRegs[I];
7349 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
7350 // The leftover reg is smaller than NarrowTy, so we need to extend it.
7351 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
7352 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
7353 }
7354
7355 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
7356 // No part of the insert affects this subregister, forward the original.
7357 DstRegs.push_back(SrcReg);
7358 continue;
7359 }
7360
7361 // OpSegStart is where this destination segment would start in OpReg if it
7362 // extended infinitely in both directions.
7363 int64_t ExtractOffset, InsertOffset;
7364 uint64_t SegSize;
7365 if (OpStart < DstStart) {
7366 InsertOffset = 0;
7367 ExtractOffset = DstStart - OpStart;
7368 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
7369 } else {
7370 InsertOffset = OpStart - DstStart;
7371 ExtractOffset = 0;
7372 SegSize =
7373 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
7374 }
7375
7376 Register SegReg = OpReg;
7377 if (ExtractOffset != 0 || SegSize != OpSize) {
7378 // A genuine extract is needed.
7379 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7380 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
7381 }
7382
7383 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
7384 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
7385 DstRegs.push_back(DstReg);
7386 }
7387
7388 uint64_t WideSize = DstRegs.size() * NarrowSize;
7389 Register DstReg = MI.getOperand(0).getReg();
7390 if (WideSize > RegTy.getSizeInBits()) {
7391 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
7392 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
7393 MIRBuilder.buildTrunc(DstReg, MergeReg);
7394 } else
7395 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7396
7397 MI.eraseFromParent();
7398 return Legalized;
7399}
7400
7403 LLT NarrowTy) {
7404 Register DstReg = MI.getOperand(0).getReg();
7405 LLT DstTy = MRI.getType(DstReg);
7406
7407 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
7408
7409 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7410 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
7411 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7412 LLT LeftoverTy;
7413 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
7414 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
7415 return UnableToLegalize;
7416
7417 LLT Unused;
7418 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
7419 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7420 llvm_unreachable("inconsistent extractParts result");
7421
7422 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7423 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
7424 {Src0Regs[I], Src1Regs[I]});
7425 DstRegs.push_back(Inst.getReg(0));
7426 }
7427
7428 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7429 auto Inst = MIRBuilder.buildInstr(
7430 MI.getOpcode(),
7431 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
7432 DstLeftoverRegs.push_back(Inst.getReg(0));
7433 }
7434
7435 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7436 LeftoverTy, DstLeftoverRegs);
7437
7438 MI.eraseFromParent();
7439 return Legalized;
7440}
7441
7444 LLT NarrowTy) {
7445 if (TypeIdx != 0)
7446 return UnableToLegalize;
7447
7448 auto [DstReg, SrcReg] = MI.getFirst2Regs();
7449
7450 LLT DstTy = MRI.getType(DstReg);
7451 if (DstTy.isVector())
7452 return UnableToLegalize;
7453
7455 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
7456 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
7457 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
7458
7459 MI.eraseFromParent();
7460 return Legalized;
7461}
7462
7465 LLT NarrowTy) {
7466 if (TypeIdx != 0)
7467 return UnableToLegalize;
7468
7469 Register CondReg = MI.getOperand(1).getReg();
7470 LLT CondTy = MRI.getType(CondReg);
7471 if (CondTy.isVector()) // TODO: Handle vselect
7472 return UnableToLegalize;
7473
7474 Register DstReg = MI.getOperand(0).getReg();
7475 LLT DstTy = MRI.getType(DstReg);
7476
7477 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7478 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7479 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
7480 LLT LeftoverTy;
7481 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
7482 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7483 return UnableToLegalize;
7484
7485 LLT Unused;
7486 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
7487 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
7488 llvm_unreachable("inconsistent extractParts result");
7489
7490 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7491 auto Select = MIRBuilder.buildSelect(NarrowTy,
7492 CondReg, Src1Regs[I], Src2Regs[I]);
7493 DstRegs.push_back(Select.getReg(0));
7494 }
7495
7496 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7497 auto Select = MIRBuilder.buildSelect(
7498 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
7499 DstLeftoverRegs.push_back(Select.getReg(0));
7500 }
7501
7502 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7503 LeftoverTy, DstLeftoverRegs);
7504
7505 MI.eraseFromParent();
7506 return Legalized;
7507}
7508
7511 LLT NarrowTy) {
7512 if (TypeIdx != 1)
7513 return UnableToLegalize;
7514
7515 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7516 unsigned NarrowSize = NarrowTy.getSizeInBits();
7517
7518 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7519 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
7520
7522 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7523 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
7524 auto C_0 = B.buildConstant(NarrowTy, 0);
7525 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7526 UnmergeSrc.getReg(1), C_0);
7527 auto LoCTLZ = IsUndef ?
7528 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
7529 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
7530 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7531 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
7532 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
7533 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
7534
7535 MI.eraseFromParent();
7536 return Legalized;
7537 }
7538
7539 return UnableToLegalize;
7540}
7541
7544 LLT NarrowTy) {
7545 if (TypeIdx != 1)
7546 return UnableToLegalize;
7547
7548 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7549 unsigned NarrowSize = NarrowTy.getSizeInBits();
7550
7551 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7552 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
7553
7555 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7556 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
7557 auto C_0 = B.buildConstant(NarrowTy, 0);
7558 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7559 UnmergeSrc.getReg(0), C_0);
7560 auto HiCTTZ = IsUndef ?
7561 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
7562 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
7563 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7564 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
7565 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
7566 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
7567
7568 MI.eraseFromParent();
7569 return Legalized;
7570 }
7571
7572 return UnableToLegalize;
7573}
7574
7577 LLT NarrowTy) {
7578 if (TypeIdx != 1)
7579 return UnableToLegalize;
7580
7581 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7582 unsigned NarrowSize = NarrowTy.getSizeInBits();
7583
7584 if (!SrcTy.isScalar() || SrcTy.getSizeInBits() != 2 * NarrowSize)
7585 return UnableToLegalize;
7586
7588
7589 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7590 Register Lo = UnmergeSrc.getReg(0);
7591 Register Hi = UnmergeSrc.getReg(1);
7592
7593 auto ShAmt = B.buildConstant(NarrowTy, NarrowSize - 1);
7594 auto Sign = B.buildAShr(NarrowTy, Hi, ShAmt);
7595
7596 auto HiIsSign = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), Hi, Sign);
7597
7598 // Invert Lo if Hi is negative. Then count the leading zeros. If there are no
7599 // leading zeros, then the MSB of Lo is different than the MSB of Hi.
7600 // Otherwise the leading zeros represent additional sign bits of the original
7601 // value.
7602 auto LoInv = B.buildXor(DstTy, Lo, Sign);
7603 auto LoCTLZ = B.buildCTLZ(DstTy, LoInv);
7604
7605 // Add NarrowSize-1 to LoCTLZ. This is the full CTLS if Hi is all sign bits.
7606 auto C_NarrowSizeM1 = B.buildConstant(DstTy, NarrowSize - 1);
7607 auto HiIsSignCTLS = B.buildAdd(DstTy, LoCTLZ, C_NarrowSizeM1);
7608
7609 auto HiCTLS = B.buildCTLS(DstTy, Hi);
7610
7611 B.buildSelect(DstReg, HiIsSign, HiIsSignCTLS, HiCTLS);
7612
7613 MI.eraseFromParent();
7614 return Legalized;
7615}
7616
7619 LLT NarrowTy) {
7620 if (TypeIdx != 1)
7621 return UnableToLegalize;
7622
7623 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7624 unsigned NarrowSize = NarrowTy.getSizeInBits();
7625
7626 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7627 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
7628
7629 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
7630 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
7631 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
7632
7633 MI.eraseFromParent();
7634 return Legalized;
7635 }
7636
7637 return UnableToLegalize;
7638}
7639
7642 LLT NarrowTy) {
7643 if (TypeIdx != 1)
7644 return UnableToLegalize;
7645
7647 Register ExpReg = MI.getOperand(2).getReg();
7648 LLT ExpTy = MRI.getType(ExpReg);
7649
7650 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
7651
7652 // Clamp the exponent to the range of the target type.
7653 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
7654 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
7655 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
7656 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
7657
7658 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
7659 Observer.changingInstr(MI);
7660 MI.getOperand(2).setReg(Trunc.getReg(0));
7661 Observer.changedInstr(MI);
7662 return Legalized;
7663}
7664
7667 unsigned Opc = MI.getOpcode();
7668 const auto &TII = MIRBuilder.getTII();
7669 auto isSupported = [this](const LegalityQuery &Q) {
7670 auto QAction = LI.getAction(Q).Action;
7671 return QAction == Legal || QAction == Libcall || QAction == Custom;
7672 };
7673 switch (Opc) {
7674 default:
7675 return UnableToLegalize;
7676 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
7677 // This trivially expands to CTLZ.
7678 Observer.changingInstr(MI);
7679 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
7680 Observer.changedInstr(MI);
7681 return Legalized;
7682 }
7683 case TargetOpcode::G_CTLZ: {
7684 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7685 unsigned Len = SrcTy.getScalarSizeInBits();
7686
7687 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7688 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
7689 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
7690 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
7691 auto ICmp = MIRBuilder.buildICmp(
7692 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
7693 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7694 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
7695 MI.eraseFromParent();
7696 return Legalized;
7697 }
7698 // for now, we do this:
7699 // NewLen = NextPowerOf2(Len);
7700 // x = x | (x >> 1);
7701 // x = x | (x >> 2);
7702 // ...
7703 // x = x | (x >>16);
7704 // x = x | (x >>32); // for 64-bit input
7705 // Upto NewLen/2
7706 // return Len - popcount(x);
7707 //
7708 // Ref: "Hacker's Delight" by Henry Warren
7709 Register Op = SrcReg;
7710 unsigned NewLen = PowerOf2Ceil(Len);
7711 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7712 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7713 auto MIBOp = MIRBuilder.buildOr(
7714 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
7715 Op = MIBOp.getReg(0);
7716 }
7717 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
7718 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
7719 MIBPop);
7720 MI.eraseFromParent();
7721 return Legalized;
7722 }
7723 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
7724 // This trivially expands to CTTZ.
7725 Observer.changingInstr(MI);
7726 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
7727 Observer.changedInstr(MI);
7728 return Legalized;
7729 }
7730 case TargetOpcode::G_CTTZ: {
7731 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7732
7733 unsigned Len = SrcTy.getScalarSizeInBits();
7734 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7735 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
7736 // zero.
7737 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
7738 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
7739 auto ICmp = MIRBuilder.buildICmp(
7740 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
7741 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7742 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
7743 MI.eraseFromParent();
7744 return Legalized;
7745 }
7746 // for now, we use: { return popcount(~x & (x - 1)); }
7747 // unless the target has ctlz but not ctpop, in which case we use:
7748 // { return 32 - nlz(~x & (x-1)); }
7749 // Ref: "Hacker's Delight" by Henry Warren
7750 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
7751 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7752 auto MIBTmp = MIRBuilder.buildAnd(
7753 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7754 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7755 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7756 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
7757 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
7758 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
7759 MI.eraseFromParent();
7760 return Legalized;
7761 }
7762 Observer.changingInstr(MI);
7763 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
7764 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7765 Observer.changedInstr(MI);
7766 return Legalized;
7767 }
7768 case TargetOpcode::G_CTPOP: {
7769 Register SrcReg = MI.getOperand(1).getReg();
7770 LLT Ty = MRI.getType(SrcReg);
7771 unsigned Size = Ty.getScalarSizeInBits();
7773
7774 // Bail out on irregular type lengths.
7775 if (Size > 128 || Size % 8 != 0)
7776 return UnableToLegalize;
7777
7778 // Count set bits in blocks of 2 bits. Default approach would be
7779 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
7780 // We use following formula instead:
7781 // B2Count = val - { (val >> 1) & 0x55555555 }
7782 // since it gives same result in blocks of 2 with one instruction less.
7783 auto C_1 = B.buildConstant(Ty, 1);
7784 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
7785 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
7786 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
7787 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7788 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
7789
7790 // In order to get count in blocks of 4 add values from adjacent block of 2.
7791 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
7792 auto C_2 = B.buildConstant(Ty, 2);
7793 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
7794 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
7795 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
7796 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7797 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7798 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7799
7800 // For count in blocks of 8 bits we don't have to mask high 4 bits before
7801 // addition since count value sits in range {0,...,8} and 4 bits are enough
7802 // to hold such binary values. After addition high 4 bits still hold count
7803 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
7804 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
7805 auto C_4 = B.buildConstant(Ty, 4);
7806 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
7807 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
7808 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
7809 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
7810 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7811
7812 assert(Size <= 128 && "Scalar size is too large for CTPOP lower algorithm");
7813
7814 // Avoid the multiply when shift-add is cheaper.
7815 if (Size == 16 && !Ty.isVector()) {
7816 // v = (v + (v >> 8)) & 0xFF;
7817 auto C_8 = B.buildConstant(Ty, 8);
7818 auto HighSum = B.buildLShr(Ty, B8Count, C_8);
7819 auto Res = B.buildAdd(Ty, B8Count, HighSum);
7820 B.buildAnd(MI.getOperand(0).getReg(), Res, B.buildConstant(Ty, 0xFF));
7821 MI.eraseFromParent();
7822 return Legalized;
7823 }
7824
7825 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
7826 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
7827 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
7828
7829 // Shift count result from 8 high bits to low bits.
7830 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
7831
7832 auto IsMulSupported = [this](const LLT Ty) {
7833 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7834 return Action == Legal || Action == WidenScalar || Action == Custom;
7835 };
7836 if (IsMulSupported(Ty)) {
7837 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
7838 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7839 } else {
7840 auto ResTmp = B8Count;
7841 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
7842 auto ShiftC = B.buildConstant(Ty, Shift);
7843 auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
7844 ResTmp = B.buildAdd(Ty, ResTmp, Shl);
7845 }
7846 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7847 }
7848 MI.eraseFromParent();
7849 return Legalized;
7850 }
7851 case TargetOpcode::G_CTLS: {
7852 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7853
7854 // ctls(x) -> ctlz(x ^ (x >> (N - 1))) - 1
7855 auto SignIdxC =
7856 MIRBuilder.buildConstant(SrcTy, SrcTy.getScalarSizeInBits() - 1);
7857 auto OneC = MIRBuilder.buildConstant(DstTy, 1);
7858
7859 auto Shr = MIRBuilder.buildAShr(SrcTy, SrcReg, SignIdxC);
7860
7861 auto Xor = MIRBuilder.buildXor(SrcTy, SrcReg, Shr);
7862 auto Ctlz = MIRBuilder.buildCTLZ(DstTy, Xor);
7863
7864 MIRBuilder.buildSub(DstReg, Ctlz, OneC);
7865 MI.eraseFromParent();
7866 return Legalized;
7867 }
7868 }
7869}
7870
7871// Check that (every element of) Reg is undef or not an exact multiple of BW.
7873 Register Reg, unsigned BW) {
7874 return matchUnaryPredicate(
7875 MRI, Reg,
7876 [=](const Constant *C) {
7877 // Null constant here means an undef.
7879 return !CI || CI->getValue().urem(BW) != 0;
7880 },
7881 /*AllowUndefs*/ true);
7882}
7883
7886 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7887 LLT Ty = MRI.getType(Dst);
7888 LLT ShTy = MRI.getType(Z);
7889
7890 unsigned BW = Ty.getScalarSizeInBits();
7891
7892 if (!isPowerOf2_32(BW))
7893 return UnableToLegalize;
7894
7895 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7896 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7897
7898 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7899 // fshl X, Y, Z -> fshr X, Y, -Z
7900 // fshr X, Y, Z -> fshl X, Y, -Z
7901 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
7902 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7903 } else {
7904 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7905 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7906 auto One = MIRBuilder.buildConstant(ShTy, 1);
7907 if (IsFSHL) {
7908 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7909 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
7910 } else {
7911 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7912 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
7913 }
7914
7915 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
7916 }
7917
7918 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
7919 MI.eraseFromParent();
7920 return Legalized;
7921}
7922
7925 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7926 LLT Ty = MRI.getType(Dst);
7927 LLT ShTy = MRI.getType(Z);
7928
7929 const unsigned BW = Ty.getScalarSizeInBits();
7930 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7931
7932 Register ShX, ShY;
7933 Register ShAmt, InvShAmt;
7934
7935 // FIXME: Emit optimized urem by constant instead of letting it expand later.
7936 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7937 // fshl: X << C | Y >> (BW - C)
7938 // fshr: X << (BW - C) | Y >> C
7939 // where C = Z % BW is not zero
7940 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7941 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7942 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
7943 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
7944 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
7945 } else {
7946 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7947 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7948 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
7949 if (isPowerOf2_32(BW)) {
7950 // Z % BW -> Z & (BW - 1)
7951 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
7952 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7953 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
7954 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
7955 } else {
7956 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7957 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7958 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
7959 }
7960
7961 auto One = MIRBuilder.buildConstant(ShTy, 1);
7962 if (IsFSHL) {
7963 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
7964 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
7965 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
7966 } else {
7967 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
7968 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
7969 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
7970 }
7971 }
7972
7973 MIRBuilder.buildOr(Dst, ShX, ShY, MachineInstr::Disjoint);
7974 MI.eraseFromParent();
7975 return Legalized;
7976}
7977
7980 // These operations approximately do the following (while avoiding undefined
7981 // shifts by BW):
7982 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
7983 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
7984 Register Dst = MI.getOperand(0).getReg();
7985 LLT Ty = MRI.getType(Dst);
7986 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
7987
7988 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7989 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7990
7991 // TODO: Use smarter heuristic that accounts for vector legalization.
7992 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
7993 return lowerFunnelShiftAsShifts(MI);
7994
7995 // This only works for powers of 2, fallback to shifts if it fails.
7996 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
7997 if (Result == UnableToLegalize)
7998 return lowerFunnelShiftAsShifts(MI);
7999 return Result;
8000}
8001
8003 auto [Dst, Src] = MI.getFirst2Regs();
8004 LLT DstTy = MRI.getType(Dst);
8005 LLT SrcTy = MRI.getType(Src);
8006
8007 uint32_t DstTySize = DstTy.getSizeInBits();
8008 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
8009 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
8010
8011 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
8012 !isPowerOf2_32(SrcTyScalarSize))
8013 return UnableToLegalize;
8014
8015 // The step between extend is too large, split it by creating an intermediate
8016 // extend instruction
8017 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
8018 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
8019 // If the destination type is illegal, split it into multiple statements
8020 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
8021 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
8022 // Unmerge the vector
8023 LLT EltTy = MidTy.changeElementCount(
8025 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
8026
8027 // ZExt the vectors
8028 LLT ZExtResTy = DstTy.changeElementCount(
8030 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
8031 {UnmergeSrc.getReg(0)});
8032 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
8033 {UnmergeSrc.getReg(1)});
8034
8035 // Merge the ending vectors
8036 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
8037
8038 MI.eraseFromParent();
8039 return Legalized;
8040 }
8041 return UnableToLegalize;
8042}
8043
8045 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
8046 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
8047 // Similar to how operand splitting is done in SelectiondDAG, we can handle
8048 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
8049 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
8050 // %lo16(<4 x s16>) = G_TRUNC %inlo
8051 // %hi16(<4 x s16>) = G_TRUNC %inhi
8052 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
8053 // %res(<8 x s8>) = G_TRUNC %in16
8054
8055 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
8056
8057 Register DstReg = MI.getOperand(0).getReg();
8058 Register SrcReg = MI.getOperand(1).getReg();
8059 LLT DstTy = MRI.getType(DstReg);
8060 LLT SrcTy = MRI.getType(SrcReg);
8061
8062 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
8064 isPowerOf2_32(SrcTy.getNumElements()) &&
8065 isPowerOf2_32(SrcTy.getScalarSizeInBits())) {
8066 // Split input type.
8067 LLT SplitSrcTy = SrcTy.changeElementCount(
8068 SrcTy.getElementCount().divideCoefficientBy(2));
8069
8070 // First, split the source into two smaller vectors.
8071 SmallVector<Register, 2> SplitSrcs;
8072 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
8073
8074 // Truncate the splits into intermediate narrower elements.
8075 LLT InterTy;
8076 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
8077 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
8078 else
8079 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
8080 for (Register &Src : SplitSrcs)
8081 Src = MIRBuilder.buildTrunc(InterTy, Src).getReg(0);
8082
8083 // Combine the new truncates into one vector
8084 auto Merge = MIRBuilder.buildMergeLikeInstr(
8085 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
8086
8087 // Truncate the new vector to the final result type
8088 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
8089 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
8090 else
8091 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
8092
8093 MI.eraseFromParent();
8094
8095 return Legalized;
8096 }
8097 return UnableToLegalize;
8098}
8099
8102 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
8103 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
8104 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
8105 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8106 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
8107 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
8108 MI.eraseFromParent();
8109 return Legalized;
8110}
8111
8113 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
8114
8115 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
8116 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
8117
8118 MIRBuilder.setInstrAndDebugLoc(MI);
8119
8120 // If a rotate in the other direction is supported, use it.
8121 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8122 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
8123 isPowerOf2_32(EltSizeInBits))
8124 return lowerRotateWithReverseRotate(MI);
8125
8126 // If a funnel shift is supported, use it.
8127 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8128 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8129 bool IsFShLegal = false;
8130 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
8131 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
8132 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
8133 Register R3) {
8134 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
8135 MI.eraseFromParent();
8136 return Legalized;
8137 };
8138 // If a funnel shift in the other direction is supported, use it.
8139 if (IsFShLegal) {
8140 return buildFunnelShift(FShOpc, Dst, Src, Amt);
8141 } else if (isPowerOf2_32(EltSizeInBits)) {
8142 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
8143 return buildFunnelShift(RevFsh, Dst, Src, Amt);
8144 }
8145 }
8146
8147 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
8148 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
8149 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
8150 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
8151 Register ShVal;
8152 Register RevShiftVal;
8153 if (isPowerOf2_32(EltSizeInBits)) {
8154 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8155 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8156 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
8157 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
8158 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
8159 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
8160 RevShiftVal =
8161 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
8162 } else {
8163 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8164 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8165 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
8166 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
8167 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
8168 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
8169 auto One = MIRBuilder.buildConstant(AmtTy, 1);
8170 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
8171 RevShiftVal =
8172 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
8173 }
8174 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal, MachineInstr::Disjoint);
8175 MI.eraseFromParent();
8176 return Legalized;
8177}
8178
8179// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
8180// representation.
8183 auto [Dst, Src] = MI.getFirst2Regs();
8184 const LLT S64 = LLT::scalar(64);
8185 const LLT S32 = LLT::scalar(32);
8186 const LLT S1 = LLT::scalar(1);
8187
8188 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8189
8190 // unsigned cul2f(ulong u) {
8191 // uint lz = clz(u);
8192 // uint e = (u != 0) ? 127U + 63U - lz : 0;
8193 // u = (u << lz) & 0x7fffffffffffffffUL;
8194 // ulong t = u & 0xffffffffffUL;
8195 // uint v = (e << 23) | (uint)(u >> 40);
8196 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
8197 // return as_float(v + r);
8198 // }
8199
8200 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
8201 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
8202
8203 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
8204
8205 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
8206 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
8207
8208 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
8209 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
8210
8211 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
8212 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
8213
8214 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
8215
8216 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
8217 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
8218
8219 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
8220 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
8221 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
8222
8223 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
8224 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
8225 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
8226 auto One = MIRBuilder.buildConstant(S32, 1);
8227
8228 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
8229 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
8230 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
8231 MIRBuilder.buildAdd(Dst, V, R);
8232
8233 MI.eraseFromParent();
8234 return Legalized;
8235}
8236
8237// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit
8238// operations and G_SITOFP
8241 auto [Dst, Src] = MI.getFirst2Regs();
8242 const LLT S64 = LLT::scalar(64);
8243 const LLT S32 = LLT::scalar(32);
8244 const LLT S1 = LLT::scalar(1);
8245
8246 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8247
8248 // For i64 < INT_MAX we simply reuse SITOFP.
8249 // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
8250 // saved before division, convert to float by SITOFP, multiply the result
8251 // by 2.
8252 auto One = MIRBuilder.buildConstant(S64, 1);
8253 auto Zero = MIRBuilder.buildConstant(S64, 0);
8254 // Result if Src < INT_MAX
8255 auto SmallResult = MIRBuilder.buildSITOFP(S32, Src);
8256 // Result if Src >= INT_MAX
8257 auto Halved = MIRBuilder.buildLShr(S64, Src, One);
8258 auto LowerBit = MIRBuilder.buildAnd(S64, Src, One);
8259 auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit);
8260 auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved);
8261 auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP);
8262 // Check if the original value is larger than INT_MAX by comparing with
8263 // zero to pick one of the two conversions.
8264 auto IsLarge =
8265 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT, S1, Src, Zero);
8266 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
8267
8268 MI.eraseFromParent();
8269 return Legalized;
8270}
8271
8272// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an
8273// IEEE double representation.
8276 auto [Dst, Src] = MI.getFirst2Regs();
8277 const LLT S64 = LLT::scalar(64);
8278 const LLT S32 = LLT::scalar(32);
8279
8280 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
8281
8282 // We create double value from 32 bit parts with 32 exponent difference.
8283 // Note that + and - are float operations that adjust the implicit leading
8284 // one, the bases 2^52 and 2^84 are for illustrative purposes.
8285 //
8286 // X = 2^52 * 1.0...LowBits
8287 // Y = 2^84 * 1.0...HighBits
8288 // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
8289 // = - 2^52 * 1.0...HighBits
8290 // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
8291 auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000));
8292 auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000));
8293 auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
8294 auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84);
8295 auto HalfWidth = MIRBuilder.buildConstant(S64, 32);
8296
8297 auto LowBits = MIRBuilder.buildTrunc(S32, Src);
8298 LowBits = MIRBuilder.buildZExt(S64, LowBits);
8299 auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits);
8300 auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth);
8301 auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits);
8302 auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP);
8303 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
8304
8305 MI.eraseFromParent();
8306 return Legalized;
8307}
8308
8309/// i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16. We cannot
8310/// convert fpround f64->f16 without double-rounding, so we manually perform the
8311/// lowering here where we know it is valid.
8314 LLT SrcTy, MachineIRBuilder &MIRBuilder) {
8315 auto M1 = MI.getOpcode() == TargetOpcode::G_UITOFP
8316 ? MIRBuilder.buildUITOFP(SrcTy, Src)
8317 : MIRBuilder.buildSITOFP(SrcTy, Src);
8318 LLT S32Ty = SrcTy.changeElementSize(32);
8319 auto M2 = MIRBuilder.buildFPTrunc(S32Ty, M1);
8320 MIRBuilder.buildFPTrunc(Dst, M2);
8321 MI.eraseFromParent();
8323}
8324
8326 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8327
8328 if (SrcTy == LLT::scalar(1)) {
8329 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
8330 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8331 MIRBuilder.buildSelect(Dst, Src, True, False);
8332 MI.eraseFromParent();
8333 return Legalized;
8334 }
8335
8336 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8337 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8338
8339 if (SrcTy != LLT::scalar(64))
8340 return UnableToLegalize;
8341
8342 if (DstTy == LLT::scalar(32))
8343 // TODO: SelectionDAG has several alternative expansions to port which may
8344 // be more reasonable depending on the available instructions. We also need
8345 // a more advanced mechanism to choose an optimal version depending on
8346 // target features such as sitofp or CTLZ availability.
8348
8349 if (DstTy == LLT::scalar(64))
8351
8352 return UnableToLegalize;
8353}
8354
8356 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8357
8358 const LLT S64 = LLT::scalar(64);
8359 const LLT S32 = LLT::scalar(32);
8360 const LLT S1 = LLT::scalar(1);
8361
8362 if (SrcTy == S1) {
8363 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
8364 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8365 MIRBuilder.buildSelect(Dst, Src, True, False);
8366 MI.eraseFromParent();
8367 return Legalized;
8368 }
8369
8370 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8371 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8372
8373 if (SrcTy != S64)
8374 return UnableToLegalize;
8375
8376 if (DstTy == S32) {
8377 // signed cl2f(long l) {
8378 // long s = l >> 63;
8379 // float r = cul2f((l + s) ^ s);
8380 // return s ? -r : r;
8381 // }
8382 Register L = Src;
8383 auto SignBit = MIRBuilder.buildConstant(S64, 63);
8384 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
8385
8386 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
8387 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
8388 auto R = MIRBuilder.buildUITOFP(S32, Xor);
8389
8390 auto RNeg = MIRBuilder.buildFNeg(S32, R);
8391 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
8392 MIRBuilder.buildConstant(S64, 0));
8393 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
8394 MI.eraseFromParent();
8395 return Legalized;
8396 }
8397
8398 return UnableToLegalize;
8399}
8400
8402 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8403 const LLT S64 = LLT::scalar(64);
8404 const LLT S32 = LLT::scalar(32);
8405
8406 if (SrcTy != S64 && SrcTy != S32)
8407 return UnableToLegalize;
8408 if (DstTy != S32 && DstTy != S64)
8409 return UnableToLegalize;
8410
8411 // FPTOSI gives same result as FPTOUI for positive signed integers.
8412 // FPTOUI needs to deal with fp values that convert to unsigned integers
8413 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
8414
8415 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
8416 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
8418 APInt::getZero(SrcTy.getSizeInBits()));
8419 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
8420
8421 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
8422
8423 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
8424 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
8425 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
8426 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
8427 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
8428 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
8429 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
8430
8431 const LLT S1 = LLT::scalar(1);
8432
8433 MachineInstrBuilder FCMP =
8434 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
8435 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
8436
8437 MI.eraseFromParent();
8438 return Legalized;
8439}
8440
8442 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8443 const LLT S64 = LLT::scalar(64);
8444 const LLT S32 = LLT::scalar(32);
8445
8446 // FIXME: Only f32 to i64 conversions are supported.
8447 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
8448 return UnableToLegalize;
8449
8450 // Expand f32 -> i64 conversion
8451 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8452 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8453
8454 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
8455
8456 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
8457 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
8458
8459 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
8460 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
8461
8462 auto SignMask = MIRBuilder.buildConstant(SrcTy,
8463 APInt::getSignMask(SrcEltBits));
8464 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
8465 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
8466 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
8467 Sign = MIRBuilder.buildSExt(DstTy, Sign);
8468
8469 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
8470 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
8471 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
8472
8473 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
8474 R = MIRBuilder.buildZExt(DstTy, R);
8475
8476 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
8477 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
8478 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
8479 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
8480
8481 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
8482 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
8483
8484 const LLT S1 = LLT::scalar(1);
8485 auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
8486 S1, Exponent, ExponentLoBit);
8487
8488 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
8489
8490 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
8491 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
8492
8493 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
8494
8495 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
8496 S1, Exponent, ZeroSrcTy);
8497
8498 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
8499 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
8500
8501 MI.eraseFromParent();
8502 return Legalized;
8503}
8504
8507 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8508
8509 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
8510 unsigned SatWidth = DstTy.getScalarSizeInBits();
8511
8512 // Determine minimum and maximum integer values and their corresponding
8513 // floating-point values.
8514 APInt MinInt, MaxInt;
8515 if (IsSigned) {
8516 MinInt = APInt::getSignedMinValue(SatWidth);
8517 MaxInt = APInt::getSignedMaxValue(SatWidth);
8518 } else {
8519 MinInt = APInt::getMinValue(SatWidth);
8520 MaxInt = APInt::getMaxValue(SatWidth);
8521 }
8522
8523 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
8524 APFloat MinFloat(Semantics);
8525 APFloat MaxFloat(Semantics);
8526
8527 APFloat::opStatus MinStatus =
8528 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
8529 APFloat::opStatus MaxStatus =
8530 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
8531 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
8532 !(MaxStatus & APFloat::opStatus::opInexact);
8533
8534 // If the integer bounds are exactly representable as floats, emit a
8535 // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
8536 // and selects.
8537 if (AreExactFloatBounds) {
8538 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
8539 auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
8540 auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT,
8541 SrcTy.changeElementSize(1), Src, MaxC);
8542 auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
8543 // Clamp by MaxFloat from above. NaN cannot occur.
8544 auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
8545 auto MinP =
8546 MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, SrcTy.changeElementSize(1), Max,
8548 auto Min =
8549 MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
8550 // Convert clamped value to integer. In the unsigned case we're done,
8551 // because we mapped NaN to MinFloat, which will cast to zero.
8552 if (!IsSigned) {
8553 MIRBuilder.buildFPTOUI(Dst, Min);
8554 MI.eraseFromParent();
8555 return Legalized;
8556 }
8557
8558 // Otherwise, select 0 if Src is NaN.
8559 auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
8560 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
8561 DstTy.changeElementSize(1), Src, Src);
8562 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
8563 FpToInt);
8564 MI.eraseFromParent();
8565 return Legalized;
8566 }
8567
8568 // Result of direct conversion. The assumption here is that the operation is
8569 // non-trapping and it's fine to apply it to an out-of-range value if we
8570 // select it away later.
8571 auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
8572 : MIRBuilder.buildFPTOUI(DstTy, Src);
8573
8574 // If Src ULT MinFloat, select MinInt. In particular, this also selects
8575 // MinInt if Src is NaN.
8576 auto ULT =
8577 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, SrcTy.changeElementSize(1), Src,
8578 MIRBuilder.buildFConstant(SrcTy, MinFloat));
8579 auto Max = MIRBuilder.buildSelect(
8580 DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
8581 // If Src OGT MaxFloat, select MaxInt.
8582 auto OGT =
8583 MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Src,
8584 MIRBuilder.buildFConstant(SrcTy, MaxFloat));
8585
8586 // In the unsigned case we are done, because we mapped NaN to MinInt, which
8587 // is already zero.
8588 if (!IsSigned) {
8589 MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
8590 Max);
8591 MI.eraseFromParent();
8592 return Legalized;
8593 }
8594
8595 // Otherwise, select 0 if Src is NaN.
8596 auto Min = MIRBuilder.buildSelect(
8597 DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
8598 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
8599 DstTy.changeElementSize(1), Src, Src);
8600 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
8601 MI.eraseFromParent();
8602 return Legalized;
8603}
8604
8605// f64 -> f16 conversion using round-to-nearest-even rounding mode.
8608 const LLT S1 = LLT::scalar(1);
8609 const LLT S32 = LLT::scalar(32);
8610
8611 auto [Dst, Src] = MI.getFirst2Regs();
8612 assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
8613 MRI.getType(Src).getScalarType() == LLT::scalar(64));
8614
8615 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
8616 return UnableToLegalize;
8617
8618 if (MI.getFlag(MachineInstr::FmAfn)) {
8619 unsigned Flags = MI.getFlags();
8620 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
8621 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
8622 MI.eraseFromParent();
8623 return Legalized;
8624 }
8625
8626 const unsigned ExpMask = 0x7ff;
8627 const unsigned ExpBiasf64 = 1023;
8628 const unsigned ExpBiasf16 = 15;
8629
8630 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
8631 Register U = Unmerge.getReg(0);
8632 Register UH = Unmerge.getReg(1);
8633
8634 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
8635 E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
8636
8637 // Subtract the fp64 exponent bias (1023) to get the real exponent and
8638 // add the f16 bias (15) to get the biased exponent for the f16 format.
8639 E = MIRBuilder.buildAdd(
8640 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
8641
8642 auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
8643 M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
8644
8645 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
8646 MIRBuilder.buildConstant(S32, 0x1ff));
8647 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
8648
8649 auto Zero = MIRBuilder.buildConstant(S32, 0);
8650 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
8651 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
8652 M = MIRBuilder.buildOr(S32, M, Lo40Set);
8653
8654 // (M != 0 ? 0x0200 : 0) | 0x7c00;
8655 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
8656 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
8657 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
8658
8659 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
8660 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
8661
8662 // N = M | (E << 12);
8663 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
8664 auto N = MIRBuilder.buildOr(S32, M, EShl12);
8665
8666 // B = clamp(1-E, 0, 13);
8667 auto One = MIRBuilder.buildConstant(S32, 1);
8668 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
8669 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
8670 B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
8671
8672 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
8673 MIRBuilder.buildConstant(S32, 0x1000));
8674
8675 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
8676 auto D0 = MIRBuilder.buildShl(S32, D, B);
8677
8678 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
8679 D0, SigSetHigh);
8680 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
8681 D = MIRBuilder.buildOr(S32, D, D1);
8682
8683 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
8684 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
8685
8686 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
8687 V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
8688
8689 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
8690 MIRBuilder.buildConstant(S32, 3));
8691 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
8692
8693 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
8694 MIRBuilder.buildConstant(S32, 5));
8695 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
8696
8697 V1 = MIRBuilder.buildOr(S32, V0, V1);
8698 V = MIRBuilder.buildAdd(S32, V, V1);
8699
8700 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
8701 E, MIRBuilder.buildConstant(S32, 30));
8702 V = MIRBuilder.buildSelect(S32, CmpEGt30,
8703 MIRBuilder.buildConstant(S32, 0x7c00), V);
8704
8705 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
8706 E, MIRBuilder.buildConstant(S32, 1039));
8707 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
8708
8709 // Extract the sign bit.
8710 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
8711 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
8712
8713 // Insert the sign bit
8714 V = MIRBuilder.buildOr(S32, Sign, V);
8715
8716 MIRBuilder.buildTrunc(Dst, V);
8717 MI.eraseFromParent();
8718 return Legalized;
8719}
8720
8723 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
8724 const LLT S64 = LLT::scalar(64);
8725 const LLT S16 = LLT::scalar(16);
8726
8727 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
8729
8730 return UnableToLegalize;
8731}
8732
8734 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8735 LLT Ty = MRI.getType(Dst);
8736
8737 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
8738 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
8739 MI.eraseFromParent();
8740 return Legalized;
8741}
8742
8744 auto [DstFrac, DstInt, Src] = MI.getFirst3Regs();
8745 LLT Ty = MRI.getType(Src);
8746 auto Flags = MI.getFlags();
8747
8748 auto IntPart = MIRBuilder.buildIntrinsicTrunc(Ty, Src, Flags);
8749 auto FracPart = MIRBuilder.buildFSub(Ty, Src, IntPart, Flags);
8750
8751 Register FracToUse;
8752 if (MI.getFlag(MachineInstr::FmNoInfs)) {
8753 FracToUse = FracPart.getReg(0);
8754 } else {
8755 auto Abs = MIRBuilder.buildFAbs(Ty, Src, Flags);
8756 const fltSemantics &Semantics = getFltSemanticForLLT(Ty.getScalarType());
8757 auto Inf = MIRBuilder.buildFConstant(Ty, APFloat::getInf(Semantics));
8758 auto IsInf = MIRBuilder.buildFCmp(CmpInst::FCMP_OEQ,
8759 Ty.changeElementSize(1), Abs, Inf);
8760 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8761 auto Select = MIRBuilder.buildSelect(Ty, IsInf, Zero, FracPart);
8762 FracToUse = Select.getReg(0);
8763 }
8764
8765 MIRBuilder.buildFCopysign(DstFrac, FracToUse, Src, Flags);
8766 MIRBuilder.buildCopy(DstInt, IntPart.getReg(0));
8767
8768 MI.eraseFromParent();
8769 return Legalized;
8770}
8771
8773 switch (Opc) {
8774 case TargetOpcode::G_SMIN:
8775 return CmpInst::ICMP_SLT;
8776 case TargetOpcode::G_SMAX:
8777 return CmpInst::ICMP_SGT;
8778 case TargetOpcode::G_UMIN:
8779 return CmpInst::ICMP_ULT;
8780 case TargetOpcode::G_UMAX:
8781 return CmpInst::ICMP_UGT;
8782 default:
8783 llvm_unreachable("not in integer min/max");
8784 }
8785}
8786
8788 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8789
8790 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
8791 LLT CmpType = MRI.getType(Dst).changeElementType(LLT::scalar(1));
8792
8793 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8794 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8795
8796 MI.eraseFromParent();
8797 return Legalized;
8798}
8799
8802 GSUCmp *Cmp = cast<GSUCmp>(&MI);
8803
8804 Register Dst = Cmp->getReg(0);
8805 LLT DstTy = MRI.getType(Dst);
8806 LLT SrcTy = MRI.getType(Cmp->getReg(1));
8807 LLT CmpTy = DstTy.changeElementSize(1);
8808
8809 CmpInst::Predicate LTPredicate = Cmp->isSigned()
8812 CmpInst::Predicate GTPredicate = Cmp->isSigned()
8815
8816 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
8817 auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8818 Cmp->getRHSReg());
8819 auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8820 Cmp->getRHSReg());
8821
8822 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
8823 auto BC = TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false);
8824 if (TLI.preferSelectsOverBooleanArithmetic(
8825 getApproximateEVTForLLT(SrcTy, Ctx)) ||
8827 auto One = MIRBuilder.buildConstant(DstTy, 1);
8828 auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8829
8830 auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
8831 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8832 } else {
8834 std::swap(IsGT, IsLT);
8835 // Extend boolean results to DstTy, which is at least i2, before subtracting
8836 // them.
8837 unsigned BoolExtOp =
8838 MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false);
8839 IsGT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8840 IsLT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8841 MIRBuilder.buildSub(Dst, IsGT, IsLT);
8842 }
8843
8844 MI.eraseFromParent();
8845 return Legalized;
8846}
8847
8850 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
8851 const int Src0Size = Src0Ty.getScalarSizeInBits();
8852 const int Src1Size = Src1Ty.getScalarSizeInBits();
8853
8854 auto SignBitMask = MIRBuilder.buildConstant(
8855 Src0Ty, APInt::getSignMask(Src0Size));
8856
8857 auto NotSignBitMask = MIRBuilder.buildConstant(
8858 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
8859
8860 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
8861 Register And1;
8862 if (Src0Ty == Src1Ty) {
8863 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
8864 } else if (Src0Size > Src1Size) {
8865 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
8866 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
8867 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
8868 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
8869 } else {
8870 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
8871 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
8872 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
8873 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
8874 }
8875
8876 // Be careful about setting nsz/nnan/ninf on every instruction, since the
8877 // constants are a nan and -0.0, but the final result should preserve
8878 // everything.
8879 unsigned Flags = MI.getFlags();
8880
8881 // We masked the sign bit and the not-sign bit, so these are disjoint.
8882 Flags |= MachineInstr::Disjoint;
8883
8884 MIRBuilder.buildOr(Dst, And0, And1, Flags);
8885
8886 MI.eraseFromParent();
8887 return Legalized;
8888}
8889
8892 // FIXME: fminnum/fmaxnum and fminimumnum/fmaximumnum should not have
8893 // identical handling. fminimumnum/fmaximumnum also need a path that do not
8894 // depend on fminnum/fmaxnum.
8895
8896 unsigned NewOp;
8897 switch (MI.getOpcode()) {
8898 case TargetOpcode::G_FMINNUM:
8899 NewOp = TargetOpcode::G_FMINNUM_IEEE;
8900 break;
8901 case TargetOpcode::G_FMINIMUMNUM:
8902 NewOp = TargetOpcode::G_FMINNUM;
8903 break;
8904 case TargetOpcode::G_FMAXNUM:
8905 NewOp = TargetOpcode::G_FMAXNUM_IEEE;
8906 break;
8907 case TargetOpcode::G_FMAXIMUMNUM:
8908 NewOp = TargetOpcode::G_FMAXNUM;
8909 break;
8910 default:
8911 llvm_unreachable("unexpected min/max opcode");
8912 }
8913
8914 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8915 LLT Ty = MRI.getType(Dst);
8916
8917 if (!MI.getFlag(MachineInstr::FmNoNans)) {
8918 // Insert canonicalizes if it's possible we need to quiet to get correct
8919 // sNaN behavior.
8920
8921 // Note this must be done here, and not as an optimization combine in the
8922 // absence of a dedicate quiet-snan instruction as we're using an
8923 // omni-purpose G_FCANONICALIZE.
8924 if (!isKnownNeverSNaN(Src0, MRI))
8925 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
8926
8927 if (!isKnownNeverSNaN(Src1, MRI))
8928 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
8929 }
8930
8931 // If there are no nans, it's safe to simply replace this with the non-IEEE
8932 // version.
8933 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
8934 MI.eraseFromParent();
8935 return Legalized;
8936}
8937
8940 unsigned Opc = MI.getOpcode();
8941 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8942 LLT Ty = MRI.getType(Dst);
8943 LLT CmpTy = Ty.changeElementSize(1);
8944
8945 bool IsMax = (Opc == TargetOpcode::G_FMAXIMUM);
8946 unsigned OpcIeee =
8947 IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE;
8948 unsigned OpcNonIeee =
8949 IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM;
8950 bool MinMaxMustRespectOrderedZero = false;
8951 Register Res;
8952
8953 // IEEE variants don't need canonicalization
8954 if (LI.isLegalOrCustom({OpcIeee, Ty})) {
8955 Res = MIRBuilder.buildInstr(OpcIeee, {Ty}, {Src0, Src1}).getReg(0);
8956 MinMaxMustRespectOrderedZero = true;
8957 } else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) {
8958 Res = MIRBuilder.buildInstr(OpcNonIeee, {Ty}, {Src0, Src1}).getReg(0);
8959 } else {
8960 auto Compare = MIRBuilder.buildFCmp(
8961 IsMax ? CmpInst::FCMP_OGT : CmpInst::FCMP_OLT, CmpTy, Src0, Src1);
8962 Res = MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0);
8963 }
8964
8965 // Propagate any NaN of both operands
8966 if (!MI.getFlag(MachineInstr::FmNoNans) &&
8967 (!isKnownNeverNaN(Src0, MRI) || isKnownNeverNaN(Src1, MRI))) {
8968 auto IsOrdered = MIRBuilder.buildFCmp(CmpInst::FCMP_ORD, CmpTy, Src0, Src1);
8969
8970 LLT ElementTy = Ty.isScalar() ? Ty : Ty.getElementType();
8971 APFloat NaNValue = APFloat::getNaN(getFltSemanticForLLT(ElementTy));
8972 Register NaN = MIRBuilder.buildFConstant(ElementTy, NaNValue).getReg(0);
8973 if (Ty.isVector())
8974 NaN = MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0);
8975
8976 Res = MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0);
8977 }
8978
8979 // fminimum/fmaximum requires -0.0 less than +0.0
8980 if (!MinMaxMustRespectOrderedZero && !MI.getFlag(MachineInstr::FmNsz)) {
8981 GISelValueTracking VT(MIRBuilder.getMF());
8982 KnownFPClass Src0Info = VT.computeKnownFPClass(Src0, fcZero);
8983 KnownFPClass Src1Info = VT.computeKnownFPClass(Src1, fcZero);
8984
8985 if (!Src0Info.isKnownNeverZero() && !Src1Info.isKnownNeverZero()) {
8986 const unsigned Flags = MI.getFlags();
8987 Register Zero = MIRBuilder.buildFConstant(Ty, 0.0).getReg(0);
8988 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_OEQ, CmpTy, Res, Zero);
8989
8990 unsigned TestClass = IsMax ? fcPosZero : fcNegZero;
8991
8992 auto LHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass);
8993 auto LHSSelect =
8994 MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags);
8995
8996 auto RHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass);
8997 auto RHSSelect =
8998 MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags);
8999
9000 Res = MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0);
9001 }
9002 }
9003
9004 MIRBuilder.buildCopy(Dst, Res);
9005 MI.eraseFromParent();
9006 return Legalized;
9007}
9008
9010 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
9011 Register DstReg = MI.getOperand(0).getReg();
9012 LLT Ty = MRI.getType(DstReg);
9013 unsigned Flags = MI.getFlags();
9014
9015 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
9016 Flags);
9017 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
9018 MI.eraseFromParent();
9019 return Legalized;
9020}
9021
9024 auto [DstReg, X] = MI.getFirst2Regs();
9025 const unsigned Flags = MI.getFlags();
9026 const LLT Ty = MRI.getType(DstReg);
9027 const LLT CondTy = Ty.changeElementSize(1);
9028
9029 // round(x) =>
9030 // t = trunc(x);
9031 // d = fabs(x - t);
9032 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
9033 // return t + o;
9034
9035 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
9036
9037 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
9038 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
9039
9040 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
9041 auto Cmp =
9042 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
9043
9044 // Could emit G_UITOFP instead
9045 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
9046 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
9047 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
9048 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
9049
9050 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
9051
9052 MI.eraseFromParent();
9053 return Legalized;
9054}
9055
9057 auto [DstReg, SrcReg] = MI.getFirst2Regs();
9058 unsigned Flags = MI.getFlags();
9059 LLT Ty = MRI.getType(DstReg);
9060 const LLT CondTy = Ty.changeElementSize(1);
9061
9062 // result = trunc(src);
9063 // if (src < 0.0 && src != result)
9064 // result += -1.0.
9065
9066 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
9067 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
9068
9069 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
9070 SrcReg, Zero, Flags);
9071 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
9072 SrcReg, Trunc, Flags);
9073 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
9074 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
9075
9076 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
9077 MI.eraseFromParent();
9078 return Legalized;
9079}
9080
9083 const unsigned NumOps = MI.getNumOperands();
9084 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
9085 unsigned PartSize = Src0Ty.getSizeInBits();
9086
9087 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
9088 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
9089
9090 for (unsigned I = 2; I != NumOps; ++I) {
9091 const unsigned Offset = (I - 1) * PartSize;
9092
9093 Register SrcReg = MI.getOperand(I).getReg();
9094 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
9095
9096 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
9097 MRI.createGenericVirtualRegister(WideTy);
9098
9099 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
9100 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
9101 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
9102 ResultReg = NextResult;
9103 }
9104
9105 if (DstTy.isPointer()) {
9106 if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
9107 DstTy.getAddressSpace())) {
9108 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
9109 return UnableToLegalize;
9110 }
9111
9112 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
9113 }
9114
9115 MI.eraseFromParent();
9116 return Legalized;
9117}
9118
9121 const unsigned NumDst = MI.getNumOperands() - 1;
9122 Register SrcReg = MI.getOperand(NumDst).getReg();
9123 Register Dst0Reg = MI.getOperand(0).getReg();
9124 LLT DstTy = MRI.getType(Dst0Reg);
9125 if (DstTy.isPointer())
9126 return UnableToLegalize; // TODO
9127
9128 SrcReg = coerceToScalar(SrcReg);
9129 if (!SrcReg)
9130 return UnableToLegalize;
9131
9132 // Expand scalarizing unmerge as bitcast to integer and shift.
9133 LLT IntTy = MRI.getType(SrcReg);
9134
9135 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
9136
9137 const unsigned DstSize = DstTy.getSizeInBits();
9138 unsigned Offset = DstSize;
9139 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
9140 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
9141 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
9142 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
9143 }
9144
9145 MI.eraseFromParent();
9146 return Legalized;
9147}
9148
9149/// Lower a vector extract or insert by writing the vector to a stack temporary
9150/// and reloading the element or vector.
9151///
9152/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
9153/// =>
9154/// %stack_temp = G_FRAME_INDEX
9155/// G_STORE %vec, %stack_temp
9156/// %idx = clamp(%idx, %vec.getNumElements())
9157/// %element_ptr = G_PTR_ADD %stack_temp, %idx
9158/// %dst = G_LOAD %element_ptr
9161 Register DstReg = MI.getOperand(0).getReg();
9162 Register SrcVec = MI.getOperand(1).getReg();
9163 Register InsertVal;
9164 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
9165 InsertVal = MI.getOperand(2).getReg();
9166
9167 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
9168
9169 LLT VecTy = MRI.getType(SrcVec);
9170 LLT EltTy = VecTy.getElementType();
9171 unsigned NumElts = VecTy.getNumElements();
9172
9173 int64_t IdxVal;
9174 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
9176 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
9177
9178 if (InsertVal) {
9179 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
9180 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
9181 } else {
9182 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
9183 }
9184
9185 MI.eraseFromParent();
9186 return Legalized;
9187 }
9188
9189 if (!EltTy.isByteSized()) { // Not implemented.
9190 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
9191 return UnableToLegalize;
9192 }
9193
9194 unsigned EltBytes = EltTy.getSizeInBytes();
9195 Align VecAlign = getStackTemporaryAlignment(VecTy);
9196 Align EltAlign;
9197
9198 MachinePointerInfo PtrInfo;
9199 auto StackTemp = createStackTemporary(
9200 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
9201 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
9202
9203 // Get the pointer to the element, and be sure not to hit undefined behavior
9204 // if the index is out of bounds.
9205 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
9206
9207 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
9208 int64_t Offset = IdxVal * EltBytes;
9209 PtrInfo = PtrInfo.getWithOffset(Offset);
9210 EltAlign = commonAlignment(VecAlign, Offset);
9211 } else {
9212 // We lose information with a variable offset.
9213 EltAlign = getStackTemporaryAlignment(EltTy);
9214 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
9215 }
9216
9217 if (InsertVal) {
9218 // Write the inserted element
9219 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
9220
9221 // Reload the whole vector.
9222 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
9223 } else {
9224 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
9225 }
9226
9227 MI.eraseFromParent();
9228 return Legalized;
9229}
9230
9233 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
9234 MI.getFirst3RegLLTs();
9235 LLT IdxTy = LLT::scalar(32);
9236
9237 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
9240 LLT EltTy = DstTy.getScalarType();
9241
9242 DenseMap<unsigned, Register> CachedExtract;
9243
9244 for (int Idx : Mask) {
9245 if (Idx < 0) {
9246 if (!Undef.isValid())
9247 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
9248 BuildVec.push_back(Undef);
9249 continue;
9250 }
9251
9252 assert(!Src0Ty.isScalar() && "Unexpected scalar G_SHUFFLE_VECTOR");
9253
9254 int NumElts = Src0Ty.getNumElements();
9255 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
9256 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
9257 auto [It, Inserted] = CachedExtract.try_emplace(Idx);
9258 if (Inserted) {
9259 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
9260 It->second =
9261 MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK).getReg(0);
9262 }
9263 BuildVec.push_back(It->second);
9264 }
9265
9266 assert(DstTy.isVector() && "Unexpected scalar G_SHUFFLE_VECTOR");
9267 MIRBuilder.buildBuildVector(DstReg, BuildVec);
9268 MI.eraseFromParent();
9269 return Legalized;
9270}
9271
9274 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
9275 MI.getFirst4RegLLTs();
9276
9277 if (VecTy.isScalableVector())
9278 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
9279
9280 Align VecAlign = getStackTemporaryAlignment(VecTy);
9281 MachinePointerInfo PtrInfo;
9282 Register StackPtr =
9283 createStackTemporary(TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign,
9284 PtrInfo)
9285 .getReg(0);
9286 MachinePointerInfo ValPtrInfo =
9288
9289 LLT IdxTy = LLT::scalar(32);
9290 LLT ValTy = VecTy.getElementType();
9291 Align ValAlign = getStackTemporaryAlignment(ValTy);
9292
9293 auto OutPos = MIRBuilder.buildConstant(IdxTy, 0);
9294
9295 bool HasPassthru =
9296 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
9297
9298 if (HasPassthru)
9299 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
9300
9301 Register LastWriteVal;
9302 std::optional<APInt> PassthruSplatVal =
9303 isConstantOrConstantSplatVector(*MRI.getVRegDef(Passthru), MRI);
9304
9305 if (PassthruSplatVal.has_value()) {
9306 LastWriteVal =
9307 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
9308 } else if (HasPassthru) {
9309 auto Popcount = MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
9310 Popcount = MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
9311 {LLT::scalar(32)}, {Popcount});
9312
9313 Register LastElmtPtr =
9314 getVectorElementPointer(StackPtr, VecTy, Popcount.getReg(0));
9315 LastWriteVal =
9316 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
9317 .getReg(0);
9318 }
9319
9320 unsigned NumElmts = VecTy.getNumElements();
9321 for (unsigned I = 0; I < NumElmts; ++I) {
9322 auto Idx = MIRBuilder.buildConstant(IdxTy, I);
9323 auto Val = MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
9324 Register ElmtPtr =
9325 getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9326 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
9327
9328 LLT MaskITy = MaskTy.getElementType();
9329 auto MaskI = MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
9330 if (MaskITy.getSizeInBits() > 1)
9331 MaskI = MIRBuilder.buildTrunc(LLT::scalar(1), MaskI);
9332
9333 MaskI = MIRBuilder.buildZExt(IdxTy, MaskI);
9334 OutPos = MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
9335
9336 if (HasPassthru && I == NumElmts - 1) {
9337 auto EndOfVector =
9338 MIRBuilder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
9339 auto AllLanesSelected = MIRBuilder.buildICmp(
9340 CmpInst::ICMP_UGT, LLT::scalar(1), OutPos, EndOfVector);
9341 OutPos = MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
9342 {OutPos, EndOfVector});
9343 ElmtPtr = getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9344
9345 LastWriteVal =
9346 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
9347 .getReg(0);
9348 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
9349 }
9350 }
9351
9352 // TODO: Use StackPtr's FrameIndex alignment.
9353 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
9354
9355 MI.eraseFromParent();
9356 return Legalized;
9357}
9358
9360 Register AllocSize,
9361 Align Alignment,
9362 LLT PtrTy) {
9363 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
9364
9365 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
9366 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
9367
9368 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
9369 // have to generate an extra instruction to negate the alloc and then use
9370 // G_PTR_ADD to add the negative offset.
9371 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
9372 if (Alignment > Align(1)) {
9373 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
9374 AlignMask.negate();
9375 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
9376 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
9377 }
9378
9379 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
9380}
9381
9384 const auto &MF = *MI.getMF();
9385 const auto &TFI = *MF.getSubtarget().getFrameLowering();
9386 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
9387 return UnableToLegalize;
9388
9389 Register Dst = MI.getOperand(0).getReg();
9390 Register AllocSize = MI.getOperand(1).getReg();
9391 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
9392
9393 LLT PtrTy = MRI.getType(Dst);
9394 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
9395 Register SPTmp =
9396 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
9397
9398 MIRBuilder.buildCopy(SPReg, SPTmp);
9399 MIRBuilder.buildCopy(Dst, SPTmp);
9400
9401 MI.eraseFromParent();
9402 return Legalized;
9403}
9404
9407 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9408 if (!StackPtr)
9409 return UnableToLegalize;
9410
9411 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
9412 MI.eraseFromParent();
9413 return Legalized;
9414}
9415
9418 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9419 if (!StackPtr)
9420 return UnableToLegalize;
9421
9422 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
9423 MI.eraseFromParent();
9424 return Legalized;
9425}
9426
9429 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9430 unsigned Offset = MI.getOperand(2).getImm();
9431
9432 // Extract sub-vector or one element
9433 if (SrcTy.isVector()) {
9434 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
9435 unsigned DstSize = DstTy.getSizeInBits();
9436
9437 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
9438 (Offset + DstSize <= SrcTy.getSizeInBits())) {
9439 // Unmerge and allow access to each Src element for the artifact combiner.
9440 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
9441
9442 // Take element(s) we need to extract and copy it (merge them).
9443 SmallVector<Register, 8> SubVectorElts;
9444 for (unsigned Idx = Offset / SrcEltSize;
9445 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
9446 SubVectorElts.push_back(Unmerge.getReg(Idx));
9447 }
9448 if (SubVectorElts.size() == 1)
9449 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
9450 else
9451 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
9452
9453 MI.eraseFromParent();
9454 return Legalized;
9455 }
9456 }
9457
9458 const DataLayout &DL = MIRBuilder.getDataLayout();
9459 if ((SrcTy.isPointer() &&
9460 DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) ||
9461 (DstTy.isPointer() &&
9462 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace()))) {
9463 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
9464 return UnableToLegalize;
9465 }
9466
9467 if ((DstTy.isScalar() || DstTy.isPointer()) &&
9468 (SrcTy.isScalar() || SrcTy.isPointer() ||
9469 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
9470 LLT SrcIntTy = SrcTy;
9471 if (!SrcTy.isScalar()) {
9472 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
9473 SrcReg = MIRBuilder.buildCast(SrcIntTy, SrcReg).getReg(0);
9474 }
9475
9476 Register ResultReg = DstReg;
9477 if (DstTy.isPointer())
9478 ResultReg =
9479 MRI.createGenericVirtualRegister(LLT::scalar(DstTy.getSizeInBits()));
9480
9481 if (Offset == 0)
9482 MIRBuilder.buildTrunc(ResultReg, SrcReg);
9483 else {
9484 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
9485 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
9486 MIRBuilder.buildTrunc(ResultReg, Shr);
9487 }
9488
9489 if (DstTy.isPointer())
9490 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
9491
9492 MI.eraseFromParent();
9493 return Legalized;
9494 }
9495
9496 return UnableToLegalize;
9497}
9498
9500 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
9501 uint64_t Offset = MI.getOperand(3).getImm();
9502
9503 LLT DstTy = MRI.getType(Src);
9504 LLT InsertTy = MRI.getType(InsertSrc);
9505
9506 const DataLayout &DL = MIRBuilder.getDataLayout();
9507 bool IsNonIntegralInsert =
9508 InsertTy.isPointerOrPointerVector() &&
9509 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace());
9510 bool IsNonIntegralDst = DstTy.isPointerOrPointerVector() &&
9511 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace());
9512
9513 // Insert sub-vector or one element
9514 if (DstTy.isVector()) {
9515 LLT EltTy = DstTy.getElementType();
9516
9517 if ((IsNonIntegralInsert || IsNonIntegralDst) && InsertTy != EltTy) {
9518 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
9519 return UnableToLegalize;
9520 }
9521
9522 unsigned EltSize = EltTy.getSizeInBits();
9523 unsigned InsertSize = InsertTy.getSizeInBits();
9524
9525 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
9526 (Offset + InsertSize <= DstTy.getSizeInBits())) {
9527 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
9529 unsigned Idx = 0;
9530 // Elements from Src before insert start Offset
9531 for (; Idx < Offset / EltSize; ++Idx) {
9532 DstElts.push_back(UnmergeSrc.getReg(Idx));
9533 }
9534
9535 // Replace elements in Src with elements from InsertSrc
9536 if (InsertTy.getSizeInBits() > EltSize) {
9537 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
9538 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
9539 ++Idx, ++i) {
9540 DstElts.push_back(UnmergeInsertSrc.getReg(i));
9541 }
9542 } else {
9543 if (InsertTy.isPointer() && !EltTy.isPointer())
9544 InsertSrc = MIRBuilder.buildPtrToInt(EltTy, InsertSrc).getReg(0);
9545 else if (!InsertTy.isPointer() && EltTy.isPointer())
9546 InsertSrc = MIRBuilder.buildIntToPtr(EltTy, InsertSrc).getReg(0);
9547 DstElts.push_back(InsertSrc);
9548 ++Idx;
9549 }
9550
9551 // Remaining elements from Src after insert
9552 for (; Idx < DstTy.getNumElements(); ++Idx) {
9553 DstElts.push_back(UnmergeSrc.getReg(Idx));
9554 }
9555
9556 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
9557 MI.eraseFromParent();
9558 return Legalized;
9559 }
9560 }
9561
9562 if (InsertTy.isVector() ||
9563 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
9564 return UnableToLegalize;
9565
9566 if (IsNonIntegralDst || IsNonIntegralInsert) {
9567 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
9568 return UnableToLegalize;
9569 }
9570
9571 LLT IntDstTy = DstTy;
9572
9573 if (!DstTy.isScalar()) {
9574 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
9575 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
9576 }
9577
9578 if (!InsertTy.isScalar()) {
9579 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
9580 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
9581 }
9582
9583 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
9584 if (Offset != 0) {
9585 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
9586 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
9587 }
9588
9590 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
9591
9592 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
9593 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
9594 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
9595
9596 MIRBuilder.buildCast(Dst, Or);
9597 MI.eraseFromParent();
9598 return Legalized;
9599}
9600
9603 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
9604 MI.getFirst4RegLLTs();
9605 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
9606
9607 LLT Ty = Dst0Ty;
9608 LLT BoolTy = Dst1Ty;
9609
9610 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
9611
9612 if (IsAdd)
9613 MIRBuilder.buildAdd(NewDst0, LHS, RHS);
9614 else
9615 MIRBuilder.buildSub(NewDst0, LHS, RHS);
9616
9617 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
9618
9619 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9620
9621 // For an addition, the result should be less than one of the operands (LHS)
9622 // if and only if the other operand (RHS) is negative, otherwise there will
9623 // be overflow.
9624 // For a subtraction, the result should be less than one of the operands
9625 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
9626 // otherwise there will be overflow.
9627 auto ResultLowerThanLHS =
9628 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
9629 auto ConditionRHS = MIRBuilder.buildICmp(
9630 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
9631
9632 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
9633
9634 MIRBuilder.buildCopy(Dst0, NewDst0);
9635 MI.eraseFromParent();
9636
9637 return Legalized;
9638}
9639
9641 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9642 const LLT Ty = MRI.getType(Res);
9643
9644 // sum = LHS + RHS + zext(CarryIn)
9645 auto Tmp = MIRBuilder.buildAdd(Ty, LHS, RHS);
9646 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9647 auto Sum = MIRBuilder.buildAdd(Ty, Tmp, CarryZ);
9648 MIRBuilder.buildCopy(Res, Sum);
9649
9650 // OvOut = icmp slt ((sum ^ lhs) & (sum ^ rhs)), 0
9651 auto AX = MIRBuilder.buildXor(Ty, Sum, LHS);
9652 auto BX = MIRBuilder.buildXor(Ty, Sum, RHS);
9653 auto T = MIRBuilder.buildAnd(Ty, AX, BX);
9654
9655 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9656 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9657
9658 MI.eraseFromParent();
9659 return Legalized;
9660}
9661
9663 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9664 const LLT Ty = MRI.getType(Res);
9665
9666 // Diff = LHS - (RHS + zext(CarryIn))
9667 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9668 auto RHSPlusCI = MIRBuilder.buildAdd(Ty, RHS, CarryZ);
9669 auto Diff = MIRBuilder.buildSub(Ty, LHS, RHSPlusCI);
9670 MIRBuilder.buildCopy(Res, Diff);
9671
9672 // ov = msb((LHS ^ RHS) & (LHS ^ Diff))
9673 auto X1 = MIRBuilder.buildXor(Ty, LHS, RHS);
9674 auto X2 = MIRBuilder.buildXor(Ty, LHS, Diff);
9675 auto T = MIRBuilder.buildAnd(Ty, X1, X2);
9676 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9677 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9678
9679 MI.eraseFromParent();
9680 return Legalized;
9681}
9682
9685 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9686 LLT Ty = MRI.getType(Res);
9687 bool IsSigned;
9688 bool IsAdd;
9689 unsigned BaseOp;
9690 switch (MI.getOpcode()) {
9691 default:
9692 llvm_unreachable("unexpected addsat/subsat opcode");
9693 case TargetOpcode::G_UADDSAT:
9694 IsSigned = false;
9695 IsAdd = true;
9696 BaseOp = TargetOpcode::G_ADD;
9697 break;
9698 case TargetOpcode::G_SADDSAT:
9699 IsSigned = true;
9700 IsAdd = true;
9701 BaseOp = TargetOpcode::G_ADD;
9702 break;
9703 case TargetOpcode::G_USUBSAT:
9704 IsSigned = false;
9705 IsAdd = false;
9706 BaseOp = TargetOpcode::G_SUB;
9707 break;
9708 case TargetOpcode::G_SSUBSAT:
9709 IsSigned = true;
9710 IsAdd = false;
9711 BaseOp = TargetOpcode::G_SUB;
9712 break;
9713 }
9714
9715 if (IsSigned) {
9716 // sadd.sat(a, b) ->
9717 // hi = 0x7fffffff - smax(a, 0)
9718 // lo = 0x80000000 - smin(a, 0)
9719 // a + smin(smax(lo, b), hi)
9720 // ssub.sat(a, b) ->
9721 // lo = smax(a, -1) - 0x7fffffff
9722 // hi = smin(a, -1) - 0x80000000
9723 // a - smin(smax(lo, b), hi)
9724 // TODO: AMDGPU can use a "median of 3" instruction here:
9725 // a +/- med3(lo, b, hi)
9726 uint64_t NumBits = Ty.getScalarSizeInBits();
9727 auto MaxVal =
9728 MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
9729 auto MinVal =
9730 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9732 if (IsAdd) {
9733 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9734 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
9735 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
9736 } else {
9737 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
9738 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
9739 MaxVal);
9740 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
9741 MinVal);
9742 }
9743 auto RHSClamped =
9744 MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
9745 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
9746 } else {
9747 // uadd.sat(a, b) -> a + umin(~a, b)
9748 // usub.sat(a, b) -> a - umin(a, b)
9749 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
9750 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
9751 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
9752 }
9753
9754 MI.eraseFromParent();
9755 return Legalized;
9756}
9757
9760 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9761 LLT Ty = MRI.getType(Res);
9762 LLT BoolTy = Ty.changeElementSize(1);
9763 bool IsSigned;
9764 bool IsAdd;
9765 unsigned OverflowOp;
9766 switch (MI.getOpcode()) {
9767 default:
9768 llvm_unreachable("unexpected addsat/subsat opcode");
9769 case TargetOpcode::G_UADDSAT:
9770 IsSigned = false;
9771 IsAdd = true;
9772 OverflowOp = TargetOpcode::G_UADDO;
9773 break;
9774 case TargetOpcode::G_SADDSAT:
9775 IsSigned = true;
9776 IsAdd = true;
9777 OverflowOp = TargetOpcode::G_SADDO;
9778 break;
9779 case TargetOpcode::G_USUBSAT:
9780 IsSigned = false;
9781 IsAdd = false;
9782 OverflowOp = TargetOpcode::G_USUBO;
9783 break;
9784 case TargetOpcode::G_SSUBSAT:
9785 IsSigned = true;
9786 IsAdd = false;
9787 OverflowOp = TargetOpcode::G_SSUBO;
9788 break;
9789 }
9790
9791 auto OverflowRes =
9792 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
9793 Register Tmp = OverflowRes.getReg(0);
9794 Register Ov = OverflowRes.getReg(1);
9795 MachineInstrBuilder Clamp;
9796 if (IsSigned) {
9797 // sadd.sat(a, b) ->
9798 // {tmp, ov} = saddo(a, b)
9799 // ov ? (tmp >>s 31) + 0x80000000 : r
9800 // ssub.sat(a, b) ->
9801 // {tmp, ov} = ssubo(a, b)
9802 // ov ? (tmp >>s 31) + 0x80000000 : r
9803 uint64_t NumBits = Ty.getScalarSizeInBits();
9804 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
9805 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
9806 auto MinVal =
9807 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9808 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
9809 } else {
9810 // uadd.sat(a, b) ->
9811 // {tmp, ov} = uaddo(a, b)
9812 // ov ? 0xffffffff : tmp
9813 // usub.sat(a, b) ->
9814 // {tmp, ov} = usubo(a, b)
9815 // ov ? 0 : tmp
9816 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
9817 }
9818 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
9819
9820 MI.eraseFromParent();
9821 return Legalized;
9822}
9823
9826 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
9827 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
9828 "Expected shlsat opcode!");
9829 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
9830 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9831 LLT Ty = MRI.getType(Res);
9832 LLT BoolTy = Ty.changeElementSize(1);
9833
9834 unsigned BW = Ty.getScalarSizeInBits();
9835 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
9836 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
9837 : MIRBuilder.buildLShr(Ty, Result, RHS);
9838
9839 MachineInstrBuilder SatVal;
9840 if (IsSigned) {
9841 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
9842 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
9843 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
9844 MIRBuilder.buildConstant(Ty, 0));
9845 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
9846 } else {
9847 SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
9848 }
9849 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
9850 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
9851
9852 MI.eraseFromParent();
9853 return Legalized;
9854}
9855
9857 auto [Dst, Src] = MI.getFirst2Regs();
9858 const LLT Ty = MRI.getType(Src);
9859 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
9860 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
9861
9862 // Swap most and least significant byte, set remaining bytes in Res to zero.
9863 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
9864 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
9865 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9866 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
9867
9868 // Set i-th high/low byte in Res to i-th low/high byte from Src.
9869 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
9870 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
9871 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
9872 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
9873 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
9874 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
9875 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
9876 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
9877 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
9878 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
9879 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9880 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
9881 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
9882 }
9883 Res.getInstr()->getOperand(0).setReg(Dst);
9884
9885 MI.eraseFromParent();
9886 return Legalized;
9887}
9888
9889//{ (Src & Mask) >> N } | { (Src << N) & Mask }
9891 MachineInstrBuilder Src, const APInt &Mask) {
9892 const LLT Ty = Dst.getLLTTy(*B.getMRI());
9893 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
9894 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
9895 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
9896 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
9897 return B.buildOr(Dst, LHS, RHS);
9898}
9899
9902 auto [Dst, Src] = MI.getFirst2Regs();
9903 const LLT SrcTy = MRI.getType(Src);
9904 unsigned Size = SrcTy.getScalarSizeInBits();
9905 unsigned VSize = SrcTy.getSizeInBits();
9906
9907 if (Size >= 8) {
9908 if (SrcTy.isVector() && (VSize % 8 == 0) &&
9909 (LI.isLegal({TargetOpcode::G_BITREVERSE,
9910 {LLT::fixed_vector(VSize / 8, 8),
9911 LLT::fixed_vector(VSize / 8, 8)}}))) {
9912 // If bitreverse is legal for i8 vector of the same size, then cast
9913 // to i8 vector type.
9914 // e.g. v4s32 -> v16s8
9915 LLT VTy = LLT::fixed_vector(VSize / 8, 8);
9916 auto BSWAP = MIRBuilder.buildBSwap(SrcTy, Src);
9917 auto Cast = MIRBuilder.buildBitcast(VTy, BSWAP);
9918 auto RBIT = MIRBuilder.buildBitReverse(VTy, Cast);
9919 MIRBuilder.buildBitcast(Dst, RBIT);
9920 } else {
9921 MachineInstrBuilder BSWAP =
9922 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {SrcTy}, {Src});
9923
9924 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
9925 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
9926 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
9927 MachineInstrBuilder Swap4 = SwapN(4, SrcTy, MIRBuilder, BSWAP,
9928 APInt::getSplat(Size, APInt(8, 0xF0)));
9929
9930 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
9931 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
9932 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
9933 MachineInstrBuilder Swap2 = SwapN(2, SrcTy, MIRBuilder, Swap4,
9934 APInt::getSplat(Size, APInt(8, 0xCC)));
9935
9936 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
9937 // 6|7
9938 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
9939 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
9940 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
9941 }
9942 } else {
9943 // Expand bitreverse for types smaller than 8 bits.
9945 for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
9947 if (I < J) {
9948 auto ShAmt = MIRBuilder.buildConstant(SrcTy, J - I);
9949 Tmp2 = MIRBuilder.buildShl(SrcTy, Src, ShAmt);
9950 } else {
9951 auto ShAmt = MIRBuilder.buildConstant(SrcTy, I - J);
9952 Tmp2 = MIRBuilder.buildLShr(SrcTy, Src, ShAmt);
9953 }
9954
9955 auto Mask = MIRBuilder.buildConstant(SrcTy, 1ULL << J);
9956 Tmp2 = MIRBuilder.buildAnd(SrcTy, Tmp2, Mask);
9957 if (I == 0)
9958 Tmp = Tmp2;
9959 else
9960 Tmp = MIRBuilder.buildOr(SrcTy, Tmp, Tmp2);
9961 }
9962 MIRBuilder.buildCopy(Dst, Tmp);
9963 }
9964
9965 MI.eraseFromParent();
9966 return Legalized;
9967}
9968
9971 MachineFunction &MF = MIRBuilder.getMF();
9972
9973 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
9974 int NameOpIdx = IsRead ? 1 : 0;
9975 int ValRegIndex = IsRead ? 0 : 1;
9976
9977 Register ValReg = MI.getOperand(ValRegIndex).getReg();
9978 const LLT Ty = MRI.getType(ValReg);
9979 const MDString *RegStr = cast<MDString>(
9980 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
9981
9982 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
9983 if (!PhysReg) {
9984 const Function &Fn = MF.getFunction();
9986 "invalid register \"" + Twine(RegStr->getString().data()) + "\" for " +
9987 (IsRead ? "llvm.read_register" : "llvm.write_register"),
9988 Fn, MI.getDebugLoc()));
9989 if (IsRead)
9990 MIRBuilder.buildUndef(ValReg);
9991
9992 MI.eraseFromParent();
9993 return Legalized;
9994 }
9995
9996 if (IsRead)
9997 MIRBuilder.buildCopy(ValReg, PhysReg);
9998 else
9999 MIRBuilder.buildCopy(PhysReg, ValReg);
10000
10001 MI.eraseFromParent();
10002 return Legalized;
10003}
10004
10007 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
10008 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
10009 Register Result = MI.getOperand(0).getReg();
10010 LLT OrigTy = MRI.getType(Result);
10011 auto SizeInBits = OrigTy.getScalarSizeInBits();
10012 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
10013
10014 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
10015 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
10016 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
10017 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
10018
10019 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
10020 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
10021 MIRBuilder.buildTrunc(Result, Shifted);
10022
10023 MI.eraseFromParent();
10024 return Legalized;
10025}
10026
10029 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
10030 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
10031
10032 if (Mask == fcNone) {
10033 MIRBuilder.buildConstant(DstReg, 0);
10034 MI.eraseFromParent();
10035 return Legalized;
10036 }
10037 if (Mask == fcAllFlags) {
10038 MIRBuilder.buildConstant(DstReg, 1);
10039 MI.eraseFromParent();
10040 return Legalized;
10041 }
10042
10043 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
10044 // version
10045
10046 unsigned BitSize = SrcTy.getScalarSizeInBits();
10047 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
10048
10049 LLT IntTy = SrcTy.changeElementType(LLT::scalar(BitSize));
10050 auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
10051
10052 // Various masks.
10053 APInt SignBit = APInt::getSignMask(BitSize);
10054 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
10055 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
10056 APInt ExpMask = Inf;
10057 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
10058 APInt QNaNBitMask =
10059 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
10060 APInt InversionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
10061
10062 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
10063 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
10064 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
10065 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
10066 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
10067
10068 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
10069 auto Sign =
10070 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs);
10071
10072 auto Res = MIRBuilder.buildConstant(DstTy, 0);
10073 // Clang doesn't support capture of structured bindings:
10074 LLT DstTyCopy = DstTy;
10075 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
10076 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
10077 };
10078
10079 // Tests that involve more than one class should be processed first.
10080 if ((Mask & fcFinite) == fcFinite) {
10081 // finite(V) ==> abs(V) u< exp_mask
10082 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
10083 ExpMaskC));
10084 Mask &= ~fcFinite;
10085 } else if ((Mask & fcFinite) == fcPosFinite) {
10086 // finite(V) && V > 0 ==> V u< exp_mask
10087 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
10088 ExpMaskC));
10089 Mask &= ~fcPosFinite;
10090 } else if ((Mask & fcFinite) == fcNegFinite) {
10091 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
10092 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
10093 ExpMaskC);
10094 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
10095 appendToRes(And);
10096 Mask &= ~fcNegFinite;
10097 }
10098
10099 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
10100 // fcZero | fcSubnormal => test all exponent bits are 0
10101 // TODO: Handle sign bit specific cases
10102 // TODO: Handle inverted case
10103 if (PartialCheck == (fcZero | fcSubnormal)) {
10104 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
10105 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10106 ExpBits, ZeroC));
10107 Mask &= ~PartialCheck;
10108 }
10109 }
10110
10111 // Check for individual classes.
10112 if (FPClassTest PartialCheck = Mask & fcZero) {
10113 if (PartialCheck == fcPosZero)
10114 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10115 AsInt, ZeroC));
10116 else if (PartialCheck == fcZero)
10117 appendToRes(
10118 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, ZeroC));
10119 else // fcNegZero
10120 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10121 AsInt, SignBitC));
10122 }
10123
10124 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
10125 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
10126 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
10127 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
10128 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
10129 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
10130 auto SubnormalRes =
10131 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
10132 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
10133 if (PartialCheck == fcNegSubnormal)
10134 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
10135 appendToRes(SubnormalRes);
10136 }
10137
10138 if (FPClassTest PartialCheck = Mask & fcInf) {
10139 if (PartialCheck == fcPosInf)
10140 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10141 AsInt, InfC));
10142 else if (PartialCheck == fcInf)
10143 appendToRes(
10144 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, InfC));
10145 else { // fcNegInf
10146 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
10147 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
10148 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
10149 AsInt, NegInfC));
10150 }
10151 }
10152
10153 if (FPClassTest PartialCheck = Mask & fcNan) {
10154 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
10155 if (PartialCheck == fcNan) {
10156 // isnan(V) ==> abs(V) u> int(inf)
10157 appendToRes(
10158 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC));
10159 } else if (PartialCheck == fcQNan) {
10160 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
10161 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
10162 InfWithQnanBitC));
10163 } else { // fcSNan
10164 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
10165 // abs(V) u< (unsigned(Inf) | quiet_bit)
10166 auto IsNan =
10167 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC);
10168 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
10169 Abs, InfWithQnanBitC);
10170 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
10171 }
10172 }
10173
10174 if (FPClassTest PartialCheck = Mask & fcNormal) {
10175 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
10176 // (max_exp-1))
10177 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
10178 auto ExpMinusOne = MIRBuilder.buildSub(
10179 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
10180 APInt MaxExpMinusOne = ExpMask - ExpLSB;
10181 auto NormalRes =
10182 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne,
10183 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
10184 if (PartialCheck == fcNegNormal)
10185 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
10186 else if (PartialCheck == fcPosNormal) {
10187 auto PosSign = MIRBuilder.buildXor(
10188 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InversionMask));
10189 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
10190 }
10191 appendToRes(NormalRes);
10192 }
10193
10194 MIRBuilder.buildCopy(DstReg, Res);
10195 MI.eraseFromParent();
10196 return Legalized;
10197}
10198
10200 // Implement G_SELECT in terms of XOR, AND, OR.
10201 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
10202 MI.getFirst4RegLLTs();
10203
10204 bool IsEltPtr = DstTy.isPointerOrPointerVector();
10205 if (IsEltPtr) {
10206 LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
10207 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
10208 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
10209 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
10210 DstTy = NewTy;
10211 }
10212
10213 if (MaskTy.isScalar()) {
10214 // Turn the scalar condition into a vector condition mask if needed.
10215
10216 Register MaskElt = MaskReg;
10217
10218 // The condition was potentially zero extended before, but we want a sign
10219 // extended boolean.
10220 if (MaskTy != LLT::scalar(1))
10221 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
10222
10223 // Continue the sign extension (or truncate) to match the data type.
10224 MaskElt =
10225 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
10226
10227 if (DstTy.isVector()) {
10228 // Generate a vector splat idiom.
10229 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
10230 MaskReg = ShufSplat.getReg(0);
10231 } else {
10232 MaskReg = MaskElt;
10233 }
10234 MaskTy = DstTy;
10235 } else if (!DstTy.isVector()) {
10236 // Cannot handle the case that mask is a vector and dst is a scalar.
10237 return UnableToLegalize;
10238 }
10239
10240 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
10241 return UnableToLegalize;
10242 }
10243
10244 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
10245 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
10246 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
10247 if (IsEltPtr) {
10248 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
10249 MIRBuilder.buildIntToPtr(DstReg, Or);
10250 } else {
10251 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
10252 }
10253 MI.eraseFromParent();
10254 return Legalized;
10255}
10256
10258 // Split DIVREM into individual instructions.
10259 unsigned Opcode = MI.getOpcode();
10260
10261 MIRBuilder.buildInstr(
10262 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
10263 : TargetOpcode::G_UDIV,
10264 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
10265 MIRBuilder.buildInstr(
10266 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
10267 : TargetOpcode::G_UREM,
10268 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
10269 MI.eraseFromParent();
10270 return Legalized;
10271}
10272
10275 // Expand %res = G_ABS %a into:
10276 // %v1 = G_ASHR %a, scalar_size-1
10277 // %v2 = G_ADD %a, %v1
10278 // %res = G_XOR %v2, %v1
10279 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
10280 Register OpReg = MI.getOperand(1).getReg();
10281 auto ShiftAmt =
10282 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
10283 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
10284 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
10285 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
10286 MI.eraseFromParent();
10287 return Legalized;
10288}
10289
10292 // Expand %res = G_ABS %a into:
10293 // %v1 = G_CONSTANT 0
10294 // %v2 = G_SUB %v1, %a
10295 // %res = G_SMAX %a, %v2
10296 Register SrcReg = MI.getOperand(1).getReg();
10297 LLT Ty = MRI.getType(SrcReg);
10298 auto Zero = MIRBuilder.buildConstant(Ty, 0);
10299 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
10300 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
10301 MI.eraseFromParent();
10302 return Legalized;
10303}
10304
10307 Register SrcReg = MI.getOperand(1).getReg();
10308 Register DestReg = MI.getOperand(0).getReg();
10309 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
10310 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
10311 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
10312 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
10313 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
10314 MI.eraseFromParent();
10315 return Legalized;
10316}
10317
10320 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10321 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10322 "Expected G_ABDS or G_ABDU instruction");
10323
10324 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10325 LLT Ty = MRI.getType(LHS);
10326
10327 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10328 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10329 Register LHSSub = MIRBuilder.buildSub(Ty, LHS, RHS).getReg(0);
10330 Register RHSSub = MIRBuilder.buildSub(Ty, RHS, LHS).getReg(0);
10331 CmpInst::Predicate Pred = (MI.getOpcode() == TargetOpcode::G_ABDS)
10334 auto ICmp = MIRBuilder.buildICmp(Pred, LLT::scalar(1), LHS, RHS);
10335 MIRBuilder.buildSelect(DstReg, ICmp, LHSSub, RHSSub);
10336
10337 MI.eraseFromParent();
10338 return Legalized;
10339}
10340
10343 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10344 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10345 "Expected G_ABDS or G_ABDU instruction");
10346
10347 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10348 LLT Ty = MRI.getType(LHS);
10349
10350 // abds(lhs, rhs) -→ sub(smax(lhs, rhs), smin(lhs, rhs))
10351 // abdu(lhs, rhs) -→ sub(umax(lhs, rhs), umin(lhs, rhs))
10352 Register MaxReg, MinReg;
10353 if (MI.getOpcode() == TargetOpcode::G_ABDS) {
10354 MaxReg = MIRBuilder.buildSMax(Ty, LHS, RHS).getReg(0);
10355 MinReg = MIRBuilder.buildSMin(Ty, LHS, RHS).getReg(0);
10356 } else {
10357 MaxReg = MIRBuilder.buildUMax(Ty, LHS, RHS).getReg(0);
10358 MinReg = MIRBuilder.buildUMin(Ty, LHS, RHS).getReg(0);
10359 }
10360 MIRBuilder.buildSub(DstReg, MaxReg, MinReg);
10361
10362 MI.eraseFromParent();
10363 return Legalized;
10364}
10365
10367 Register SrcReg = MI.getOperand(1).getReg();
10368 Register DstReg = MI.getOperand(0).getReg();
10369
10370 LLT Ty = MRI.getType(DstReg);
10371
10372 // Reset sign bit
10373 MIRBuilder.buildAnd(
10374 DstReg, SrcReg,
10375 MIRBuilder.buildConstant(
10376 Ty, APInt::getSignedMaxValue(Ty.getScalarSizeInBits())));
10377
10378 MI.eraseFromParent();
10379 return Legalized;
10380}
10381
10384 Register SrcReg = MI.getOperand(1).getReg();
10385 LLT SrcTy = MRI.getType(SrcReg);
10386 LLT DstTy = MRI.getType(SrcReg);
10387
10388 // The source could be a scalar if the IR type was <1 x sN>.
10389 if (SrcTy.isScalar()) {
10390 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
10391 return UnableToLegalize; // FIXME: handle extension.
10392 // This can be just a plain copy.
10393 Observer.changingInstr(MI);
10394 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
10395 Observer.changedInstr(MI);
10396 return Legalized;
10397 }
10398 return UnableToLegalize;
10399}
10400
10402 MachineFunction &MF = *MI.getMF();
10403 const DataLayout &DL = MIRBuilder.getDataLayout();
10404 LLVMContext &Ctx = MF.getFunction().getContext();
10405 Register ListPtr = MI.getOperand(1).getReg();
10406 LLT PtrTy = MRI.getType(ListPtr);
10407
10408 // LstPtr is a pointer to the head of the list. Get the address
10409 // of the head of the list.
10410 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
10411 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
10412 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
10413 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
10414
10415 const Align A(MI.getOperand(2).getImm());
10416 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
10417 if (A > TLI.getMinStackArgumentAlignment()) {
10418 Register AlignAmt =
10419 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
10420 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
10421 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
10422 VAList = AndDst.getReg(0);
10423 }
10424
10425 // Increment the pointer, VAList, to the next vaarg
10426 // The list should be bumped by the size of element in the current head of
10427 // list.
10428 Register Dst = MI.getOperand(0).getReg();
10429 LLT LLTTy = MRI.getType(Dst);
10430 Type *Ty = getTypeForLLT(LLTTy, Ctx);
10431 auto IncAmt =
10432 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
10433 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
10434
10435 // Store the increment VAList to the legalized pointer
10437 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
10438 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
10439 // Load the actual argument out of the pointer VAList
10440 Align EltAlignment = DL.getABITypeAlign(Ty);
10441 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
10442 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
10443 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
10444
10445 MI.eraseFromParent();
10446 return Legalized;
10447}
10448
10450 // On Darwin, -Os means optimize for size without hurting performance, so
10451 // only really optimize for size when -Oz (MinSize) is used.
10453 return MF.getFunction().hasMinSize();
10454 return MF.getFunction().hasOptSize();
10455}
10456
10457// Returns a list of types to use for memory op lowering in MemOps. A partial
10458// port of findOptimalMemOpLowering in TargetLowering.
10459static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
10460 unsigned Limit, const MemOp &Op,
10461 unsigned DstAS, unsigned SrcAS,
10462 const AttributeList &FuncAttributes,
10463 const TargetLowering &TLI) {
10464 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
10465 return false;
10466
10467 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
10468
10469 if (Ty == LLT()) {
10470 // Use the largest scalar type whose alignment constraints are satisfied.
10471 // We only need to check DstAlign here as SrcAlign is always greater or
10472 // equal to DstAlign (or zero).
10473 Ty = LLT::scalar(64);
10474 if (Op.isFixedDstAlign())
10475 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
10476 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
10477 Ty = LLT::scalar(Ty.getSizeInBytes());
10478 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
10479 // FIXME: check for the largest legal type we can load/store to.
10480 }
10481
10482 unsigned NumMemOps = 0;
10483 uint64_t Size = Op.size();
10484 while (Size) {
10485 unsigned TySize = Ty.getSizeInBytes();
10486 while (TySize > Size) {
10487 // For now, only use non-vector load / store's for the left-over pieces.
10488 LLT NewTy = Ty;
10489 // FIXME: check for mem op safety and legality of the types. Not all of
10490 // SDAGisms map cleanly to GISel concepts.
10491 if (NewTy.isVector())
10492 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
10493 NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
10494 unsigned NewTySize = NewTy.getSizeInBytes();
10495 assert(NewTySize > 0 && "Could not find appropriate type");
10496
10497 // If the new LLT cannot cover all of the remaining bits, then consider
10498 // issuing a (or a pair of) unaligned and overlapping load / store.
10499 unsigned Fast;
10500 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
10501 MVT VT = getMVTForLLT(Ty);
10502 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
10504 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
10506 Fast)
10507 TySize = Size;
10508 else {
10509 Ty = NewTy;
10510 TySize = NewTySize;
10511 }
10512 }
10513
10514 if (++NumMemOps > Limit)
10515 return false;
10516
10517 MemOps.push_back(Ty);
10518 Size -= TySize;
10519 }
10520
10521 return true;
10522}
10523
10524// Get a vectorized representation of the memset value operand, GISel edition.
10526 MachineRegisterInfo &MRI = *MIB.getMRI();
10527 unsigned NumBits = Ty.getScalarSizeInBits();
10528 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10529 if (!Ty.isVector() && ValVRegAndVal) {
10530 APInt Scalar = ValVRegAndVal->Value.trunc(8);
10531 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
10532 return MIB.buildConstant(Ty, SplatVal).getReg(0);
10533 }
10534
10535 // Extend the byte value to the larger type, and then multiply by a magic
10536 // value 0x010101... in order to replicate it across every byte.
10537 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
10538 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
10539 return MIB.buildConstant(Ty, 0).getReg(0);
10540 }
10541
10542 LLT ExtType = Ty.getScalarType();
10543 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
10544 if (NumBits > 8) {
10545 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
10546 auto MagicMI = MIB.buildConstant(ExtType, Magic);
10547 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
10548 }
10549
10550 // For vector types create a G_BUILD_VECTOR.
10551 if (Ty.isVector())
10552 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
10553
10554 return Val;
10555}
10556
10558LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
10559 uint64_t KnownLen, Align Alignment,
10560 bool IsVolatile) {
10561 auto &MF = *MI.getParent()->getParent();
10562 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10563 auto &DL = MF.getDataLayout();
10564 LLVMContext &C = MF.getFunction().getContext();
10565
10566 assert(KnownLen != 0 && "Have a zero length memset length!");
10567
10568 bool DstAlignCanChange = false;
10569 MachineFrameInfo &MFI = MF.getFrameInfo();
10570 bool OptSize = shouldLowerMemFuncForSize(MF);
10571
10572 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10573 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10574 DstAlignCanChange = true;
10575
10576 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
10577 std::vector<LLT> MemOps;
10578
10579 const auto &DstMMO = **MI.memoperands_begin();
10580 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10581
10582 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10583 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
10584
10585 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
10586 MemOp::Set(KnownLen, DstAlignCanChange,
10587 Alignment,
10588 /*IsZeroMemset=*/IsZeroVal,
10589 /*IsVolatile=*/IsVolatile),
10590 DstPtrInfo.getAddrSpace(), ~0u,
10591 MF.getFunction().getAttributes(), TLI))
10592 return UnableToLegalize;
10593
10594 if (DstAlignCanChange) {
10595 // Get an estimate of the type from the LLT.
10596 Type *IRTy = getTypeForLLT(MemOps[0], C);
10597 Align NewAlign = DL.getABITypeAlign(IRTy);
10598 if (NewAlign > Alignment) {
10599 Alignment = NewAlign;
10600 unsigned FI = FIDef->getOperand(1).getIndex();
10601 // Give the stack frame object a larger alignment if needed.
10602 if (MFI.getObjectAlign(FI) < Alignment)
10603 MFI.setObjectAlignment(FI, Alignment);
10604 }
10605 }
10606
10607 MachineIRBuilder MIB(MI);
10608 // Find the largest store and generate the bit pattern for it.
10609 LLT LargestTy = MemOps[0];
10610 for (unsigned i = 1; i < MemOps.size(); i++)
10611 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
10612 LargestTy = MemOps[i];
10613
10614 // The memset stored value is always defined as an s8, so in order to make it
10615 // work with larger store types we need to repeat the bit pattern across the
10616 // wider type.
10617 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
10618
10619 if (!MemSetValue)
10620 return UnableToLegalize;
10621
10622 // Generate the stores. For each store type in the list, we generate the
10623 // matching store of that type to the destination address.
10624 LLT PtrTy = MRI.getType(Dst);
10625 unsigned DstOff = 0;
10626 unsigned Size = KnownLen;
10627 for (unsigned I = 0; I < MemOps.size(); I++) {
10628 LLT Ty = MemOps[I];
10629 unsigned TySize = Ty.getSizeInBytes();
10630 if (TySize > Size) {
10631 // Issuing an unaligned load / store pair that overlaps with the previous
10632 // pair. Adjust the offset accordingly.
10633 assert(I == MemOps.size() - 1 && I != 0);
10634 DstOff -= TySize - Size;
10635 }
10636
10637 // If this store is smaller than the largest store see whether we can get
10638 // the smaller value for free with a truncate.
10639 Register Value = MemSetValue;
10640 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
10641 MVT VT = getMVTForLLT(Ty);
10642 MVT LargestVT = getMVTForLLT(LargestTy);
10643 if (!LargestTy.isVector() && !Ty.isVector() &&
10644 TLI.isTruncateFree(LargestVT, VT))
10645 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
10646 else
10647 Value = getMemsetValue(Val, Ty, MIB);
10648 if (!Value)
10649 return UnableToLegalize;
10650 }
10651
10652 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
10653
10654 Register Ptr = Dst;
10655 if (DstOff != 0) {
10656 auto Offset =
10657 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
10658 Ptr = MIB.buildObjectPtrOffset(PtrTy, Dst, Offset).getReg(0);
10659 }
10660
10661 MIB.buildStore(Value, Ptr, *StoreMMO);
10662 DstOff += Ty.getSizeInBytes();
10663 Size -= TySize;
10664 }
10665
10666 MI.eraseFromParent();
10667 return Legalized;
10668}
10669
10671LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
10672 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10673
10674 auto [Dst, Src, Len] = MI.getFirst3Regs();
10675
10676 const auto *MMOIt = MI.memoperands_begin();
10677 const MachineMemOperand *MemOp = *MMOIt;
10678 bool IsVolatile = MemOp->isVolatile();
10679
10680 // See if this is a constant length copy
10681 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10682 // FIXME: support dynamically sized G_MEMCPY_INLINE
10683 assert(LenVRegAndVal &&
10684 "inline memcpy with dynamic size is not yet supported");
10685 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10686 if (KnownLen == 0) {
10687 MI.eraseFromParent();
10688 return Legalized;
10689 }
10690
10691 const auto &DstMMO = **MI.memoperands_begin();
10692 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10693 Align DstAlign = DstMMO.getBaseAlign();
10694 Align SrcAlign = SrcMMO.getBaseAlign();
10695
10696 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
10697 IsVolatile);
10698}
10699
10701LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
10702 uint64_t KnownLen, Align DstAlign,
10703 Align SrcAlign, bool IsVolatile) {
10704 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10705 return lowerMemcpy(MI, Dst, Src, KnownLen,
10706 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
10707 IsVolatile);
10708}
10709
10711LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
10712 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
10713 Align SrcAlign, bool IsVolatile) {
10714 auto &MF = *MI.getParent()->getParent();
10715 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10716 auto &DL = MF.getDataLayout();
10718
10719 assert(KnownLen != 0 && "Have a zero length memcpy length!");
10720
10721 bool DstAlignCanChange = false;
10722 MachineFrameInfo &MFI = MF.getFrameInfo();
10723 Align Alignment = std::min(DstAlign, SrcAlign);
10724
10725 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10726 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10727 DstAlignCanChange = true;
10728
10729 // FIXME: infer better src pointer alignment like SelectionDAG does here.
10730 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
10731 // if the memcpy is in a tail call position.
10732
10733 std::vector<LLT> MemOps;
10734
10735 const auto &DstMMO = **MI.memoperands_begin();
10736 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10737 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10738 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10739
10741 MemOps, Limit,
10742 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10743 IsVolatile),
10744 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10745 MF.getFunction().getAttributes(), TLI))
10746 return UnableToLegalize;
10747
10748 if (DstAlignCanChange) {
10749 // Get an estimate of the type from the LLT.
10750 Type *IRTy = getTypeForLLT(MemOps[0], C);
10751 Align NewAlign = DL.getABITypeAlign(IRTy);
10752
10753 // Don't promote to an alignment that would require dynamic stack
10754 // realignment.
10756 if (!TRI->hasStackRealignment(MF))
10757 if (MaybeAlign StackAlign = DL.getStackAlignment())
10758 NewAlign = std::min(NewAlign, *StackAlign);
10759
10760 if (NewAlign > Alignment) {
10761 Alignment = NewAlign;
10762 unsigned FI = FIDef->getOperand(1).getIndex();
10763 // Give the stack frame object a larger alignment if needed.
10764 if (MFI.getObjectAlign(FI) < Alignment)
10765 MFI.setObjectAlignment(FI, Alignment);
10766 }
10767 }
10768
10769 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
10770
10771 MachineIRBuilder MIB(MI);
10772 // Now we need to emit a pair of load and stores for each of the types we've
10773 // collected. I.e. for each type, generate a load from the source pointer of
10774 // that type width, and then generate a corresponding store to the dest buffer
10775 // of that value loaded. This can result in a sequence of loads and stores
10776 // mixed types, depending on what the target specifies as good types to use.
10777 unsigned CurrOffset = 0;
10778 unsigned Size = KnownLen;
10779 for (auto CopyTy : MemOps) {
10780 // Issuing an unaligned load / store pair that overlaps with the previous
10781 // pair. Adjust the offset accordingly.
10782 if (CopyTy.getSizeInBytes() > Size)
10783 CurrOffset -= CopyTy.getSizeInBytes() - Size;
10784
10785 // Construct MMOs for the accesses.
10786 auto *LoadMMO =
10787 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10788 auto *StoreMMO =
10789 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10790
10791 // Create the load.
10792 Register LoadPtr = Src;
10794 if (CurrOffset != 0) {
10795 LLT SrcTy = MRI.getType(Src);
10796 Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
10797 .getReg(0);
10798 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10799 }
10800 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
10801
10802 // Create the store.
10803 Register StorePtr = Dst;
10804 if (CurrOffset != 0) {
10805 LLT DstTy = MRI.getType(Dst);
10806 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10807 }
10808 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
10809 CurrOffset += CopyTy.getSizeInBytes();
10810 Size -= CopyTy.getSizeInBytes();
10811 }
10812
10813 MI.eraseFromParent();
10814 return Legalized;
10815}
10816
10818LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
10819 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
10820 bool IsVolatile) {
10821 auto &MF = *MI.getParent()->getParent();
10822 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10823 auto &DL = MF.getDataLayout();
10824 LLVMContext &C = MF.getFunction().getContext();
10825
10826 assert(KnownLen != 0 && "Have a zero length memmove length!");
10827
10828 bool DstAlignCanChange = false;
10829 MachineFrameInfo &MFI = MF.getFrameInfo();
10830 bool OptSize = shouldLowerMemFuncForSize(MF);
10831 Align Alignment = std::min(DstAlign, SrcAlign);
10832
10833 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10834 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10835 DstAlignCanChange = true;
10836
10837 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
10838 std::vector<LLT> MemOps;
10839
10840 const auto &DstMMO = **MI.memoperands_begin();
10841 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10842 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10843 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10844
10845 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
10846 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
10847 // same thing here.
10849 MemOps, Limit,
10850 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10851 /*IsVolatile*/ true),
10852 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10853 MF.getFunction().getAttributes(), TLI))
10854 return UnableToLegalize;
10855
10856 if (DstAlignCanChange) {
10857 // Get an estimate of the type from the LLT.
10858 Type *IRTy = getTypeForLLT(MemOps[0], C);
10859 Align NewAlign = DL.getABITypeAlign(IRTy);
10860
10861 // Don't promote to an alignment that would require dynamic stack
10862 // realignment.
10863 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
10864 if (!TRI->hasStackRealignment(MF))
10865 if (MaybeAlign StackAlign = DL.getStackAlignment())
10866 NewAlign = std::min(NewAlign, *StackAlign);
10867
10868 if (NewAlign > Alignment) {
10869 Alignment = NewAlign;
10870 unsigned FI = FIDef->getOperand(1).getIndex();
10871 // Give the stack frame object a larger alignment if needed.
10872 if (MFI.getObjectAlign(FI) < Alignment)
10873 MFI.setObjectAlignment(FI, Alignment);
10874 }
10875 }
10876
10877 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
10878
10879 MachineIRBuilder MIB(MI);
10880 // Memmove requires that we perform the loads first before issuing the stores.
10881 // Apart from that, this loop is pretty much doing the same thing as the
10882 // memcpy codegen function.
10883 unsigned CurrOffset = 0;
10884 SmallVector<Register, 16> LoadVals;
10885 for (auto CopyTy : MemOps) {
10886 // Construct MMO for the load.
10887 auto *LoadMMO =
10888 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10889
10890 // Create the load.
10891 Register LoadPtr = Src;
10892 if (CurrOffset != 0) {
10893 LLT SrcTy = MRI.getType(Src);
10894 auto Offset =
10895 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
10896 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10897 }
10898 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
10899 CurrOffset += CopyTy.getSizeInBytes();
10900 }
10901
10902 CurrOffset = 0;
10903 for (unsigned I = 0; I < MemOps.size(); ++I) {
10904 LLT CopyTy = MemOps[I];
10905 // Now store the values loaded.
10906 auto *StoreMMO =
10907 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10908
10909 Register StorePtr = Dst;
10910 if (CurrOffset != 0) {
10911 LLT DstTy = MRI.getType(Dst);
10912 auto Offset =
10913 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
10914 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10915 }
10916 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
10917 CurrOffset += CopyTy.getSizeInBytes();
10918 }
10919 MI.eraseFromParent();
10920 return Legalized;
10921}
10922
10925 const unsigned Opc = MI.getOpcode();
10926 // This combine is fairly complex so it's not written with a separate
10927 // matcher function.
10928 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
10929 Opc == TargetOpcode::G_MEMSET) &&
10930 "Expected memcpy like instruction");
10931
10932 auto MMOIt = MI.memoperands_begin();
10933 const MachineMemOperand *MemOp = *MMOIt;
10934
10935 Align DstAlign = MemOp->getBaseAlign();
10936 Align SrcAlign;
10937 auto [Dst, Src, Len] = MI.getFirst3Regs();
10938
10939 if (Opc != TargetOpcode::G_MEMSET) {
10940 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
10941 MemOp = *(++MMOIt);
10942 SrcAlign = MemOp->getBaseAlign();
10943 }
10944
10945 // See if this is a constant length copy
10946 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10947 if (!LenVRegAndVal)
10948 return UnableToLegalize;
10949 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10950
10951 if (KnownLen == 0) {
10952 MI.eraseFromParent();
10953 return Legalized;
10954 }
10955
10956 if (MaxLen && KnownLen > MaxLen)
10957 return UnableToLegalize;
10958
10959 bool IsVolatile = MemOp->isVolatile();
10960 if (Opc == TargetOpcode::G_MEMCPY) {
10961 auto &MF = *MI.getParent()->getParent();
10962 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10963 bool OptSize = shouldLowerMemFuncForSize(MF);
10964 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
10965 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
10966 IsVolatile);
10967 }
10968 if (Opc == TargetOpcode::G_MEMMOVE)
10969 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
10970 if (Opc == TargetOpcode::G_MEMSET)
10971 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
10972 return UnableToLegalize;
10973}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S16
constexpr LLT S1
constexpr LLT S32
constexpr LLT S64
AMDGPU Register Bank Select
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define X(NUM, ENUM, NAME)
Definition ELF.h:849
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
Definition Utils.h:75
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
#define LCALL5(A)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static LegalizerHelper::LegalizeResult loweri64tof16ITOFP(MachineInstr &MI, Register Dst, LLT DstTy, Register Src, LLT SrcTy, MachineIRBuilder &MIRBuilder)
i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16.
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t High
R600 Clause Merge
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1402
APInt bitcastToAPInt() const
Definition APFloat.h:1408
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1193
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1153
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1164
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1043
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1527
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:956
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1697
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:217
void negate()
Negate this APInt in place.
Definition APInt.h:1483
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:1016
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:880
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition APInt.h:271
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
bool isSigned() const
Definition InstrTypes.h:930
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
const APFloat & getValueAPF() const
Definition Constants.h:463
This is the shared class of boolean and integer constants.
Definition Constants.h:87
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isBigEndian() const
Definition DataLayout.h:216
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:714
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
Represents a threeway compare.
Represents a G_STORE.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:354
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isPointerOrPointerVector() const
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr LLT changeVectorElementType(LLT NewEltTy) const
Returns a vector with the same number of elements but the new element type.
constexpr LLT getScalarType() const
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
constexpr LLT changeVectorElementCount(ElementCount EC) const
Return a vector with the same element type and the new element count.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LLVM_ABI LegalizeResult lowerShlSat(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LLVM_ABI LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LLVM_ABI LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSSUBE(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerLoad(GAnyLoad &MI)
LLVM_ABI LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
LLVM_ABI void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizerHelper::LegalizeResult createAtomicLibcall(MachineInstr &MI) const
LLVM_ABI LegalizeResult lowerFConstant(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerBitreverse(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LLVM_ABI LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTLS(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerEXT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerStore(GStore &MI)
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult narrowScalarShiftMultiway(MachineInstr &MI, LLT TargetTy)
Multi-way shift legalization: directly split wide shifts into target-sized parts in a single step,...
LLVM_ABI LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI Register buildConstantShiftPart(unsigned Opcode, unsigned PartIdx, unsigned NumParts, ArrayRef< Register > SrcParts, const ShiftParams &Params, LLT TargetTy, LLT ShiftAmtTy)
Generates a single output part for constant shifts using direct indexing.
LLVM_ABI void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LLVM_ABI LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LLVM_ABI LegalizeResult lowerFPTOUI(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LLVM_ABI LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LLVM_ABI LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B, const LibcallLoweringInfo *Libcalls=nullptr)
LLVM_ABI LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LLVM_ABI LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LLVM_ABI LegalizeResult lowerBitcast(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerInsert(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtract(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LLVM_ABI LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPOWI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFAbs(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI)
const LegalizerInfo & getLegalizerInfo() const
Expose LegalizerInfo so the clients can re-use.
LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LLVM_ABI LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarShiftByConstantMultiway(MachineInstr &MI, const APInt &Amt, LLT TargetTy, LLT ShiftAmtTy)
Optimized path for constant shift amounts using static indexing.
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMODF(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LLVM_ABI LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSADDE(MachineInstr &MI)
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LLVM_ABI LegalizeResult lowerFunnelShift(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI LegalizeResult conversionLibcall(MachineInstr &MI, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, bool IsSigned=false) const
LLVM_ABI void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LLVM_ABI LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI)
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LLVM_ABI LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LLVM_ABI LegalizeResult lowerFPTOSI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerUITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerShuffleVector(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerMergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult createMemLibcall(MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Create a libcall to memcpy et al.
LLVM_ABI LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
LLVM_ABI void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LLVM_ABI LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult createLibcall(const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr) const
Helper function that creates a libcall to the given Name using the given calling convention CC.
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LLVM_ABI LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerDIVREM(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI Register buildVariableShiftPart(unsigned Opcode, Register MainOperand, Register ShiftAmt, LLT TargetTy, Register CarryOperand=Register())
Generates a shift part with carry for variable shifts.
LLVM_ABI void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
LLVM_ABI void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LLVM_ABI LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LLVM_ABI LegalizeResult lowerStackRestore(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerStackSave(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBswap(MachineInstr &MI)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LLVM_ABI LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LLVM_ABI LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver) const
LLVM_ABI LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
LLVM_ABI LegalizeResult lowerFMinimumMaximum(MachineInstr &MI)
Tracks which library functions to use for a particular subtarget.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
A single uniqued string.
Definition Metadata.h:722
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:632
Machine Value Type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
LLVM_ABI iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLT getLLTTy(const MachineRegisterInfo &MRI) const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:137
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:483
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, DriverKit, XROS, or bridgeOS).
Definition Triple.h:646
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:313
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
Definition Type.cpp:295
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:286
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:317
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:291
static LLVM_ABI Type * getX86_FP80Ty(LLVMContext &C)
Definition Type.cpp:294
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:290
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:288
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:831
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:2036
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:652
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1669
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:293
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition MathExtras.h:223
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1565
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1622
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1152
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
LLVM_ABI LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
Definition Utils.cpp:1189
unsigned M1(unsigned Val)
Definition VE.h:377
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:357
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Success
The lock was released successfully.
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition Utils.cpp:507
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1885
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition MathExtras.h:232
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:432
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition Utils.h:347
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
Definition Utils.cpp:1277
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:345
@ Custom
The result values require a custom uniformity check.
Definition Uniformity.h:31
LLVM_ABI void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
Definition Utils.cpp:610
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
SmallVector< ISD::ArgFlagsTy, 4 > Flags
CallingConv::ID CallConv
Calling convention to be used for the call.
bool isKnownNeverZero() const
Return true if it's known this can never be a zero.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.