LLVM 23.0.0git
CombinerHelper.cpp
Go to the documentation of this file.
1//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
9#include "llvm/ADT/APFloat.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SetVector.h"
34#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/InstrTypes.h"
42#include <cmath>
43#include <optional>
44#include <tuple>
45
46#define DEBUG_TYPE "gi-combiner"
47
48using namespace llvm;
49using namespace MIPatternMatch;
50
51// Option to allow testing of the combiner while no targets know about indexed
52// addressing.
53static cl::opt<bool>
54 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
55 cl::desc("Force all indexed operations to be "
56 "legal for the GlobalISel combiner"));
57
62 const LegalizerInfo *LI)
63 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), VT(VT),
65 TII(Builder.getMF().getSubtarget().getInstrInfo()),
66 RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
67 TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
68 (void)this->VT;
69}
70
72 return *Builder.getMF().getSubtarget().getTargetLowering();
73}
74
76 return Builder.getMF();
77}
78
82
83LLVMContext &CombinerHelper::getContext() const { return Builder.getContext(); }
84
85/// \returns The little endian in-memory byte position of byte \p I in a
86/// \p ByteWidth bytes wide type.
87///
88/// E.g. Given a 4-byte type x, x[0] -> byte 0
89static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
90 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
91 return I;
92}
93
94/// Determines the LogBase2 value for a non-null input value using the
95/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
97 auto &MRI = *MIB.getMRI();
98 LLT Ty = MRI.getType(V);
99 auto Ctlz = MIB.buildCTLZ(Ty, V);
100 auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
101 return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
102}
103
104/// \returns The big endian in-memory byte position of byte \p I in a
105/// \p ByteWidth bytes wide type.
106///
107/// E.g. Given a 4-byte type x, x[0] -> byte 3
108static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
109 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
110 return ByteWidth - I - 1;
111}
112
113/// Given a map from byte offsets in memory to indices in a load/store,
114/// determine if that map corresponds to a little or big endian byte pattern.
115///
116/// \param MemOffset2Idx maps memory offsets to address offsets.
117/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
118///
119/// \returns true if the map corresponds to a big endian byte pattern, false if
120/// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
121///
122/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
123/// are as follows:
124///
125/// AddrOffset Little endian Big endian
126/// 0 0 3
127/// 1 1 2
128/// 2 2 1
129/// 3 3 0
130static std::optional<bool>
132 int64_t LowestIdx) {
133 // Need at least two byte positions to decide on endianness.
134 unsigned Width = MemOffset2Idx.size();
135 if (Width < 2)
136 return std::nullopt;
137 bool BigEndian = true, LittleEndian = true;
138 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
139 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
140 if (MemOffsetAndIdx == MemOffset2Idx.end())
141 return std::nullopt;
142 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
143 assert(Idx >= 0 && "Expected non-negative byte offset?");
144 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
145 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
146 if (!BigEndian && !LittleEndian)
147 return std::nullopt;
148 }
149
150 assert((BigEndian != LittleEndian) &&
151 "Pattern cannot be both big and little endian!");
152 return BigEndian;
153}
154
156
157bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
158 assert(LI && "Must have LegalizerInfo to query isLegal!");
159 return LI->getAction(Query).Action == LegalizeActions::Legal;
160}
161
163 const LegalityQuery &Query) const {
164 return isPreLegalize() || isLegal(Query);
165}
166
168 return isLegal(Query) ||
169 LI->getAction(Query).Action == LegalizeActions::WidenScalar;
170}
171
173 const LegalityQuery &Query) const {
174 LegalizeAction Action = LI->getAction(Query).Action;
175 return Action == LegalizeActions::Legal ||
177}
178
180 if (!Ty.isVector())
181 return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
182 // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
183 if (isPreLegalize())
184 return true;
185 LLT EltTy = Ty.getElementType();
186 return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
187 isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
188}
189
191 Register ToReg) const {
192 Observer.changingAllUsesOfReg(MRI, FromReg);
193
194 if (MRI.constrainRegAttrs(ToReg, FromReg))
195 MRI.replaceRegWith(FromReg, ToReg);
196 else
197 Builder.buildCopy(FromReg, ToReg);
198
199 Observer.finishedChangingAllUsesOfReg();
200}
201
203 MachineOperand &FromRegOp,
204 Register ToReg) const {
205 assert(FromRegOp.getParent() && "Expected an operand in an MI");
206 Observer.changingInstr(*FromRegOp.getParent());
207
208 FromRegOp.setReg(ToReg);
209
210 Observer.changedInstr(*FromRegOp.getParent());
211}
212
214 unsigned ToOpcode) const {
215 Observer.changingInstr(FromMI);
216
217 FromMI.setDesc(Builder.getTII().get(ToOpcode));
218
219 Observer.changedInstr(FromMI);
220}
221
223 return RBI->getRegBank(Reg, MRI, *TRI);
224}
225
227 const RegisterBank *RegBank) const {
228 if (RegBank)
229 MRI.setRegBank(Reg, *RegBank);
230}
231
233 if (matchCombineCopy(MI)) {
235 return true;
236 }
237 return false;
238}
240 if (MI.getOpcode() != TargetOpcode::COPY)
241 return false;
242 Register DstReg = MI.getOperand(0).getReg();
243 Register SrcReg = MI.getOperand(1).getReg();
244 return canReplaceReg(DstReg, SrcReg, MRI);
245}
247 Register DstReg = MI.getOperand(0).getReg();
248 Register SrcReg = MI.getOperand(1).getReg();
249 replaceRegWith(MRI, DstReg, SrcReg);
250 MI.eraseFromParent();
251}
252
254 MachineInstr &MI, BuildFnTy &MatchInfo) const {
255 // Ported from InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating.
256 Register DstOp = MI.getOperand(0).getReg();
257 Register OrigOp = MI.getOperand(1).getReg();
258
259 if (!MRI.hasOneNonDBGUse(OrigOp))
260 return false;
261
262 MachineInstr *OrigDef = MRI.getUniqueVRegDef(OrigOp);
263 // Even if only a single operand of the PHI is not guaranteed non-poison,
264 // moving freeze() backwards across a PHI can cause optimization issues for
265 // other users of that operand.
266 //
267 // Moving freeze() from one of the output registers of a G_UNMERGE_VALUES to
268 // the source register is unprofitable because it makes the freeze() more
269 // strict than is necessary (it would affect the whole register instead of
270 // just the subreg being frozen).
271 if (OrigDef->isPHI() || isa<GUnmerge>(OrigDef))
272 return false;
273
274 if (canCreateUndefOrPoison(OrigOp, MRI,
275 /*ConsiderFlagsAndMetadata=*/false))
276 return false;
277
278 std::optional<MachineOperand> MaybePoisonOperand;
279 for (MachineOperand &Operand : OrigDef->uses()) {
280 if (!Operand.isReg())
281 return false;
282
283 if (isGuaranteedNotToBeUndefOrPoison(Operand.getReg(), MRI))
284 continue;
285
286 if (!MaybePoisonOperand)
287 MaybePoisonOperand = Operand;
288 else {
289 // We have more than one maybe-poison operand. Moving the freeze is
290 // unsafe.
291 return false;
292 }
293 }
294
295 // Eliminate freeze if all operands are guaranteed non-poison.
296 if (!MaybePoisonOperand) {
297 MatchInfo = [=](MachineIRBuilder &B) {
298 Observer.changingInstr(*OrigDef);
299 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
300 Observer.changedInstr(*OrigDef);
301 B.buildCopy(DstOp, OrigOp);
302 };
303 return true;
304 }
305
306 Register MaybePoisonOperandReg = MaybePoisonOperand->getReg();
307 LLT MaybePoisonOperandRegTy = MRI.getType(MaybePoisonOperandReg);
308
309 MatchInfo = [=](MachineIRBuilder &B) mutable {
310 Observer.changingInstr(*OrigDef);
311 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
312 Observer.changedInstr(*OrigDef);
313 B.setInsertPt(*OrigDef->getParent(), OrigDef->getIterator());
314 auto Freeze = B.buildFreeze(MaybePoisonOperandRegTy, MaybePoisonOperandReg);
316 MRI, *OrigDef->findRegisterUseOperand(MaybePoisonOperandReg, TRI),
317 Freeze.getReg(0));
318 replaceRegWith(MRI, DstOp, OrigOp);
319 };
320 return true;
321}
322
325 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
326 "Invalid instruction");
327 bool IsUndef = true;
328 MachineInstr *Undef = nullptr;
329
330 // Walk over all the operands of concat vectors and check if they are
331 // build_vector themselves or undef.
332 // Then collect their operands in Ops.
333 for (const MachineOperand &MO : MI.uses()) {
334 Register Reg = MO.getReg();
335 MachineInstr *Def = MRI.getVRegDef(Reg);
336 assert(Def && "Operand not defined");
337 if (!MRI.hasOneNonDBGUse(Reg))
338 return false;
339 switch (Def->getOpcode()) {
340 case TargetOpcode::G_BUILD_VECTOR:
341 IsUndef = false;
342 // Remember the operands of the build_vector to fold
343 // them into the yet-to-build flattened concat vectors.
344 for (const MachineOperand &BuildVecMO : Def->uses())
345 Ops.push_back(BuildVecMO.getReg());
346 break;
347 case TargetOpcode::G_IMPLICIT_DEF: {
348 LLT OpType = MRI.getType(Reg);
349 // Keep one undef value for all the undef operands.
350 if (!Undef) {
351 Builder.setInsertPt(*MI.getParent(), MI);
352 Undef = Builder.buildUndef(OpType.getScalarType());
353 }
354 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
355 OpType.getScalarType() &&
356 "All undefs should have the same type");
357 // Break the undef vector in as many scalar elements as needed
358 // for the flattening.
359 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
360 EltIdx != EltEnd; ++EltIdx)
361 Ops.push_back(Undef->getOperand(0).getReg());
362 break;
363 }
364 default:
365 return false;
366 }
367 }
368
369 // Check if the combine is illegal
370 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
372 {TargetOpcode::G_BUILD_VECTOR, {DstTy, MRI.getType(Ops[0])}})) {
373 return false;
374 }
375
376 if (IsUndef)
377 Ops.clear();
378
379 return true;
380}
383 // We determined that the concat_vectors can be flatten.
384 // Generate the flattened build_vector.
385 Register DstReg = MI.getOperand(0).getReg();
386 Builder.setInsertPt(*MI.getParent(), MI);
387 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
388
389 // Note: IsUndef is sort of redundant. We could have determine it by
390 // checking that at all Ops are undef. Alternatively, we could have
391 // generate a build_vector of undefs and rely on another combine to
392 // clean that up. For now, given we already gather this information
393 // in matchCombineConcatVectors, just save compile time and issue the
394 // right thing.
395 if (Ops.empty())
396 Builder.buildUndef(NewDstReg);
397 else
398 Builder.buildBuildVector(NewDstReg, Ops);
399 replaceRegWith(MRI, DstReg, NewDstReg);
400 MI.eraseFromParent();
401}
402
404 auto &Shuffle = cast<GShuffleVector>(MI);
405
406 Register SrcVec1 = Shuffle.getSrc1Reg();
407 Register SrcVec2 = Shuffle.getSrc2Reg();
408 LLT EltTy = MRI.getType(SrcVec1).getElementType();
409 int Width = MRI.getType(SrcVec1).getNumElements();
410
411 auto Unmerge1 = Builder.buildUnmerge(EltTy, SrcVec1);
412 auto Unmerge2 = Builder.buildUnmerge(EltTy, SrcVec2);
413
414 SmallVector<Register> Extracts;
415 // Select only applicable elements from unmerged values.
416 for (int Val : Shuffle.getMask()) {
417 if (Val == -1)
418 Extracts.push_back(Builder.buildUndef(EltTy).getReg(0));
419 else if (Val < Width)
420 Extracts.push_back(Unmerge1.getReg(Val));
421 else
422 Extracts.push_back(Unmerge2.getReg(Val - Width));
423 }
424 assert(Extracts.size() > 0 && "Expected at least one element in the shuffle");
425 if (Extracts.size() == 1)
426 Builder.buildCopy(MI.getOperand(0).getReg(), Extracts[0]);
427 else
428 Builder.buildBuildVector(MI.getOperand(0).getReg(), Extracts);
429 MI.eraseFromParent();
430}
431
434 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
435 auto ConcatMI1 =
436 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(1).getReg()));
437 auto ConcatMI2 =
438 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(2).getReg()));
439 if (!ConcatMI1 || !ConcatMI2)
440 return false;
441
442 // Check that the sources of the Concat instructions have the same type
443 if (MRI.getType(ConcatMI1->getSourceReg(0)) !=
444 MRI.getType(ConcatMI2->getSourceReg(0)))
445 return false;
446
447 LLT ConcatSrcTy = MRI.getType(ConcatMI1->getReg(1));
448 LLT ShuffleSrcTy1 = MRI.getType(MI.getOperand(1).getReg());
449 unsigned ConcatSrcNumElt = ConcatSrcTy.getNumElements();
450 for (unsigned i = 0; i < Mask.size(); i += ConcatSrcNumElt) {
451 // Check if the index takes a whole source register from G_CONCAT_VECTORS
452 // Assumes that all Sources of G_CONCAT_VECTORS are the same type
453 if (Mask[i] == -1) {
454 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
455 if (i + j >= Mask.size())
456 return false;
457 if (Mask[i + j] != -1)
458 return false;
459 }
461 {TargetOpcode::G_IMPLICIT_DEF, {ConcatSrcTy}}))
462 return false;
463 Ops.push_back(0);
464 } else if (Mask[i] % ConcatSrcNumElt == 0) {
465 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
466 if (i + j >= Mask.size())
467 return false;
468 if (Mask[i + j] != Mask[i] + static_cast<int>(j))
469 return false;
470 }
471 // Retrieve the source register from its respective G_CONCAT_VECTORS
472 // instruction
473 if (Mask[i] < ShuffleSrcTy1.getNumElements()) {
474 Ops.push_back(ConcatMI1->getSourceReg(Mask[i] / ConcatSrcNumElt));
475 } else {
476 Ops.push_back(ConcatMI2->getSourceReg(Mask[i] / ConcatSrcNumElt -
477 ConcatMI1->getNumSources()));
478 }
479 } else {
480 return false;
481 }
482 }
483
485 {TargetOpcode::G_CONCAT_VECTORS,
486 {MRI.getType(MI.getOperand(0).getReg()), ConcatSrcTy}}))
487 return false;
488
489 return !Ops.empty();
490}
491
494 LLT SrcTy;
495 for (Register &Reg : Ops) {
496 if (Reg != 0)
497 SrcTy = MRI.getType(Reg);
498 }
499 assert(SrcTy.isValid() && "Unexpected full undef vector in concat combine");
500
501 Register UndefReg = 0;
502
503 for (Register &Reg : Ops) {
504 if (Reg == 0) {
505 if (UndefReg == 0)
506 UndefReg = Builder.buildUndef(SrcTy).getReg(0);
507 Reg = UndefReg;
508 }
509 }
510
511 if (Ops.size() > 1)
512 Builder.buildConcatVectors(MI.getOperand(0).getReg(), Ops);
513 else
514 Builder.buildCopy(MI.getOperand(0).getReg(), Ops[0]);
515 MI.eraseFromParent();
516}
517
522 return true;
523 }
524 return false;
525}
526
529 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
530 "Invalid instruction kind");
531 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
532 Register Src1 = MI.getOperand(1).getReg();
533 LLT SrcType = MRI.getType(Src1);
534
535 unsigned DstNumElts = DstType.getNumElements();
536 unsigned SrcNumElts = SrcType.getNumElements();
537
538 // If the resulting vector is smaller than the size of the source
539 // vectors being concatenated, we won't be able to replace the
540 // shuffle vector into a concat_vectors.
541 //
542 // Note: We may still be able to produce a concat_vectors fed by
543 // extract_vector_elt and so on. It is less clear that would
544 // be better though, so don't bother for now.
545 //
546 // If the destination is a scalar, the size of the sources doesn't
547 // matter. we will lower the shuffle to a plain copy. This will
548 // work only if the source and destination have the same size. But
549 // that's covered by the next condition.
550 //
551 // TODO: If the size between the source and destination don't match
552 // we could still emit an extract vector element in that case.
553 if (DstNumElts < 2 * SrcNumElts)
554 return false;
555
556 // Check that the shuffle mask can be broken evenly between the
557 // different sources.
558 if (DstNumElts % SrcNumElts != 0)
559 return false;
560
561 // Mask length is a multiple of the source vector length.
562 // Check if the shuffle is some kind of concatenation of the input
563 // vectors.
564 unsigned NumConcat = DstNumElts / SrcNumElts;
565 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
566 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
567 for (unsigned i = 0; i != DstNumElts; ++i) {
568 int Idx = Mask[i];
569 // Undef value.
570 if (Idx < 0)
571 continue;
572 // Ensure the indices in each SrcType sized piece are sequential and that
573 // the same source is used for the whole piece.
574 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
575 (ConcatSrcs[i / SrcNumElts] >= 0 &&
576 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
577 return false;
578 // Remember which source this index came from.
579 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
580 }
581
582 // The shuffle is concatenating multiple vectors together.
583 // Collect the different operands for that.
584 Register UndefReg;
585 Register Src2 = MI.getOperand(2).getReg();
586 for (auto Src : ConcatSrcs) {
587 if (Src < 0) {
588 if (!UndefReg) {
589 Builder.setInsertPt(*MI.getParent(), MI);
590 UndefReg = Builder.buildUndef(SrcType).getReg(0);
591 }
592 Ops.push_back(UndefReg);
593 } else if (Src == 0)
594 Ops.push_back(Src1);
595 else
596 Ops.push_back(Src2);
597 }
598 return true;
599}
600
602 ArrayRef<Register> Ops) const {
603 Register DstReg = MI.getOperand(0).getReg();
604 Builder.setInsertPt(*MI.getParent(), MI);
605 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
606
607 if (Ops.size() == 1)
608 Builder.buildCopy(NewDstReg, Ops[0]);
609 else
610 Builder.buildMergeLikeInstr(NewDstReg, Ops);
611
612 replaceRegWith(MRI, DstReg, NewDstReg);
613 MI.eraseFromParent();
614}
615
616namespace {
617
618/// Select a preference between two uses. CurrentUse is the current preference
619/// while *ForCandidate is attributes of the candidate under consideration.
620PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
621 PreferredTuple &CurrentUse,
622 const LLT TyForCandidate,
623 unsigned OpcodeForCandidate,
624 MachineInstr *MIForCandidate) {
625 if (!CurrentUse.Ty.isValid()) {
626 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
627 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
628 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
629 return CurrentUse;
630 }
631
632 // We permit the extend to hoist through basic blocks but this is only
633 // sensible if the target has extending loads. If you end up lowering back
634 // into a load and extend during the legalizer then the end result is
635 // hoisting the extend up to the load.
636
637 // Prefer defined extensions to undefined extensions as these are more
638 // likely to reduce the number of instructions.
639 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
640 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
641 return CurrentUse;
642 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
643 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
644 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
645
646 // Prefer sign extensions to zero extensions as sign-extensions tend to be
647 // more expensive. Don't do this if the load is already a zero-extend load
648 // though, otherwise we'll rewrite a zero-extend load into a sign-extend
649 // later.
650 if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
651 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
652 OpcodeForCandidate == TargetOpcode::G_ZEXT)
653 return CurrentUse;
654 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
655 OpcodeForCandidate == TargetOpcode::G_SEXT)
656 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
657 }
658
659 // This is potentially target specific. We've chosen the largest type
660 // because G_TRUNC is usually free. One potential catch with this is that
661 // some targets have a reduced number of larger registers than smaller
662 // registers and this choice potentially increases the live-range for the
663 // larger value.
664 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
665 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
666 }
667 return CurrentUse;
668}
669
670/// Find a suitable place to insert some instructions and insert them. This
671/// function accounts for special cases like inserting before a PHI node.
672/// The current strategy for inserting before PHI's is to duplicate the
673/// instructions for each predecessor. However, while that's ok for G_TRUNC
674/// on most targets since it generally requires no code, other targets/cases may
675/// want to try harder to find a dominating block.
676static void InsertInsnsWithoutSideEffectsBeforeUse(
679 MachineOperand &UseMO)>
680 Inserter) {
681 MachineInstr &UseMI = *UseMO.getParent();
682
683 MachineBasicBlock *InsertBB = UseMI.getParent();
684
685 // If the use is a PHI then we want the predecessor block instead.
686 if (UseMI.isPHI()) {
687 MachineOperand *PredBB = std::next(&UseMO);
688 InsertBB = PredBB->getMBB();
689 }
690
691 // If the block is the same block as the def then we want to insert just after
692 // the def instead of at the start of the block.
693 if (InsertBB == DefMI.getParent()) {
695 Inserter(InsertBB, std::next(InsertPt), UseMO);
696 return;
697 }
698
699 // Otherwise we want the start of the BB
700 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
701}
702} // end anonymous namespace
703
705 PreferredTuple Preferred;
706 if (matchCombineExtendingLoads(MI, Preferred)) {
707 applyCombineExtendingLoads(MI, Preferred);
708 return true;
709 }
710 return false;
711}
712
713static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) {
714 unsigned CandidateLoadOpc;
715 switch (ExtOpc) {
716 case TargetOpcode::G_ANYEXT:
717 CandidateLoadOpc = TargetOpcode::G_LOAD;
718 break;
719 case TargetOpcode::G_SEXT:
720 CandidateLoadOpc = TargetOpcode::G_SEXTLOAD;
721 break;
722 case TargetOpcode::G_ZEXT:
723 CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD;
724 break;
725 default:
726 llvm_unreachable("Unexpected extend opc");
727 }
728 return CandidateLoadOpc;
729}
730
732 MachineInstr &MI, PreferredTuple &Preferred) const {
733 // We match the loads and follow the uses to the extend instead of matching
734 // the extends and following the def to the load. This is because the load
735 // must remain in the same position for correctness (unless we also add code
736 // to find a safe place to sink it) whereas the extend is freely movable.
737 // It also prevents us from duplicating the load for the volatile case or just
738 // for performance.
739 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
740 if (!LoadMI)
741 return false;
742
743 Register LoadReg = LoadMI->getDstReg();
744
745 LLT LoadValueTy = MRI.getType(LoadReg);
746 if (!LoadValueTy.isScalar())
747 return false;
748
749 // Most architectures are going to legalize <s8 loads into at least a 1 byte
750 // load, and the MMOs can only describe memory accesses in multiples of bytes.
751 // If we try to perform extload combining on those, we can end up with
752 // %a(s8) = extload %ptr (load 1 byte from %ptr)
753 // ... which is an illegal extload instruction.
754 if (LoadValueTy.getSizeInBits() < 8)
755 return false;
756
757 // For non power-of-2 types, they will very likely be legalized into multiple
758 // loads. Don't bother trying to match them into extending loads.
760 return false;
761
762 // Find the preferred type aside from the any-extends (unless it's the only
763 // one) and non-extending ops. We'll emit an extending load to that type and
764 // and emit a variant of (extend (trunc X)) for the others according to the
765 // relative type sizes. At the same time, pick an extend to use based on the
766 // extend involved in the chosen type.
767 unsigned PreferredOpcode =
768 isa<GLoad>(&MI)
769 ? TargetOpcode::G_ANYEXT
770 : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
771 Preferred = {LLT(), PreferredOpcode, nullptr};
772 for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
773 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
774 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
775 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
776 const auto &MMO = LoadMI->getMMO();
777 // Don't do anything for atomics.
778 if (MMO.isAtomic())
779 continue;
780 // Check for legality.
781 if (!isPreLegalize()) {
782 LegalityQuery::MemDesc MMDesc(MMO);
783 unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode());
784 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
785 LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
786 if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}})
787 .Action != LegalizeActions::Legal)
788 continue;
789 }
790 Preferred = ChoosePreferredUse(MI, Preferred,
791 MRI.getType(UseMI.getOperand(0).getReg()),
792 UseMI.getOpcode(), &UseMI);
793 }
794 }
795
796 // There were no extends
797 if (!Preferred.MI)
798 return false;
799 // It should be impossible to chose an extend without selecting a different
800 // type since by definition the result of an extend is larger.
801 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
802
803 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
804 return true;
805}
806
808 MachineInstr &MI, PreferredTuple &Preferred) const {
809 // Rewrite the load to the chosen extending load.
810 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
811
812 // Inserter to insert a truncate back to the original type at a given point
813 // with some basic CSE to limit truncate duplication to one per BB.
815 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
816 MachineBasicBlock::iterator InsertBefore,
817 MachineOperand &UseMO) {
818 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
819 if (PreviouslyEmitted) {
820 Observer.changingInstr(*UseMO.getParent());
821 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
822 Observer.changedInstr(*UseMO.getParent());
823 return;
824 }
825
826 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
827 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
828 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
829 EmittedInsns[InsertIntoBB] = NewMI;
830 replaceRegOpWith(MRI, UseMO, NewDstReg);
831 };
832
833 Observer.changingInstr(MI);
834 unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode);
835 MI.setDesc(Builder.getTII().get(LoadOpc));
836
837 // Rewrite all the uses to fix up the types.
838 auto &LoadValue = MI.getOperand(0);
840 llvm::make_pointer_range(MRI.use_operands(LoadValue.getReg())));
841
842 for (auto *UseMO : Uses) {
843 MachineInstr *UseMI = UseMO->getParent();
844
845 // If the extend is compatible with the preferred extend then we should fix
846 // up the type and extend so that it uses the preferred use.
847 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
848 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
849 Register UseDstReg = UseMI->getOperand(0).getReg();
850 MachineOperand &UseSrcMO = UseMI->getOperand(1);
851 const LLT UseDstTy = MRI.getType(UseDstReg);
852 if (UseDstReg != ChosenDstReg) {
853 if (Preferred.Ty == UseDstTy) {
854 // If the use has the same type as the preferred use, then merge
855 // the vregs and erase the extend. For example:
856 // %1:_(s8) = G_LOAD ...
857 // %2:_(s32) = G_SEXT %1(s8)
858 // %3:_(s32) = G_ANYEXT %1(s8)
859 // ... = ... %3(s32)
860 // rewrites to:
861 // %2:_(s32) = G_SEXTLOAD ...
862 // ... = ... %2(s32)
863 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
864 Observer.erasingInstr(*UseMO->getParent());
865 UseMO->getParent()->eraseFromParent();
866 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
867 // If the preferred size is smaller, then keep the extend but extend
868 // from the result of the extending load. For example:
869 // %1:_(s8) = G_LOAD ...
870 // %2:_(s32) = G_SEXT %1(s8)
871 // %3:_(s64) = G_ANYEXT %1(s8)
872 // ... = ... %3(s64)
873 /// rewrites to:
874 // %2:_(s32) = G_SEXTLOAD ...
875 // %3:_(s64) = G_ANYEXT %2:_(s32)
876 // ... = ... %3(s64)
877 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
878 } else {
879 // If the preferred size is large, then insert a truncate. For
880 // example:
881 // %1:_(s8) = G_LOAD ...
882 // %2:_(s64) = G_SEXT %1(s8)
883 // %3:_(s32) = G_ZEXT %1(s8)
884 // ... = ... %3(s32)
885 /// rewrites to:
886 // %2:_(s64) = G_SEXTLOAD ...
887 // %4:_(s8) = G_TRUNC %2:_(s32)
888 // %3:_(s64) = G_ZEXT %2:_(s8)
889 // ... = ... %3(s64)
890 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
891 InsertTruncAt);
892 }
893 continue;
894 }
895 // The use is (one of) the uses of the preferred use we chose earlier.
896 // We're going to update the load to def this value later so just erase
897 // the old extend.
898 Observer.erasingInstr(*UseMO->getParent());
899 UseMO->getParent()->eraseFromParent();
900 continue;
901 }
902
903 // The use isn't an extend. Truncate back to the type we originally loaded.
904 // This is free on many targets.
905 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
906 }
907
908 MI.getOperand(0).setReg(ChosenDstReg);
909 Observer.changedInstr(MI);
910}
911
913 BuildFnTy &MatchInfo) const {
914 assert(MI.getOpcode() == TargetOpcode::G_AND);
915
916 // If we have the following code:
917 // %mask = G_CONSTANT 255
918 // %ld = G_LOAD %ptr, (load s16)
919 // %and = G_AND %ld, %mask
920 //
921 // Try to fold it into
922 // %ld = G_ZEXTLOAD %ptr, (load s8)
923
924 Register Dst = MI.getOperand(0).getReg();
925 if (MRI.getType(Dst).isVector())
926 return false;
927
928 auto MaybeMask =
929 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
930 if (!MaybeMask)
931 return false;
932
933 APInt MaskVal = MaybeMask->Value;
934
935 if (!MaskVal.isMask())
936 return false;
937
938 Register SrcReg = MI.getOperand(1).getReg();
939 // Don't use getOpcodeDef() here since intermediate instructions may have
940 // multiple users.
941 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
942 if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
943 return false;
944
945 Register LoadReg = LoadMI->getDstReg();
946 LLT RegTy = MRI.getType(LoadReg);
947 Register PtrReg = LoadMI->getPointerReg();
948 unsigned RegSize = RegTy.getSizeInBits();
949 LocationSize LoadSizeBits = LoadMI->getMemSizeInBits();
950 unsigned MaskSizeBits = MaskVal.countr_one();
951
952 // The mask may not be larger than the in-memory type, as it might cover sign
953 // extended bits
954 if (MaskSizeBits > LoadSizeBits.getValue())
955 return false;
956
957 // If the mask covers the whole destination register, there's nothing to
958 // extend
959 if (MaskSizeBits >= RegSize)
960 return false;
961
962 // Most targets cannot deal with loads of size < 8 and need to re-legalize to
963 // at least byte loads. Avoid creating such loads here
964 if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
965 return false;
966
967 const MachineMemOperand &MMO = LoadMI->getMMO();
968 LegalityQuery::MemDesc MemDesc(MMO);
969
970 // Don't modify the memory access size if this is atomic/volatile, but we can
971 // still adjust the opcode to indicate the high bit behavior.
972 if (LoadMI->isSimple())
973 MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
974 else if (LoadSizeBits.getValue() > MaskSizeBits ||
975 LoadSizeBits.getValue() == RegSize)
976 return false;
977
978 // TODO: Could check if it's legal with the reduced or original memory size.
980 {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
981 return false;
982
983 MatchInfo = [=](MachineIRBuilder &B) {
984 B.setInstrAndDebugLoc(*LoadMI);
985 auto &MF = B.getMF();
986 auto PtrInfo = MMO.getPointerInfo();
987 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
988 B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
989 LoadMI->eraseFromParent();
990 };
991 return true;
992}
993
995 const MachineInstr &UseMI) const {
996 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
997 "shouldn't consider debug uses");
998 assert(DefMI.getParent() == UseMI.getParent());
999 if (&DefMI == &UseMI)
1000 return true;
1001 const MachineBasicBlock &MBB = *DefMI.getParent();
1002 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
1003 return &MI == &DefMI || &MI == &UseMI;
1004 });
1005 if (DefOrUse == MBB.end())
1006 llvm_unreachable("Block must contain both DefMI and UseMI!");
1007 return &*DefOrUse == &DefMI;
1008}
1009
1011 const MachineInstr &UseMI) const {
1012 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
1013 "shouldn't consider debug uses");
1014 if (MDT)
1015 return MDT->dominates(&DefMI, &UseMI);
1016 else if (DefMI.getParent() != UseMI.getParent())
1017 return false;
1018
1019 return isPredecessor(DefMI, UseMI);
1020}
1021
1023 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1024 Register SrcReg = MI.getOperand(1).getReg();
1025 Register LoadUser = SrcReg;
1026
1027 if (MRI.getType(SrcReg).isVector())
1028 return false;
1029
1030 Register TruncSrc;
1031 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
1032 LoadUser = TruncSrc;
1033
1034 uint64_t SizeInBits = MI.getOperand(2).getImm();
1035 // If the source is a G_SEXTLOAD from the same bit width, then we don't
1036 // need any extend at all, just a truncate.
1037 if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
1038 // If truncating more than the original extended value, abort.
1039 auto LoadSizeBits = LoadMI->getMemSizeInBits();
1040 if (TruncSrc &&
1041 MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits.getValue())
1042 return false;
1043 if (LoadSizeBits == SizeInBits)
1044 return true;
1045 }
1046 return false;
1047}
1048
1050 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1051 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1052 MI.eraseFromParent();
1053}
1054
1056 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const {
1057 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1058
1059 Register DstReg = MI.getOperand(0).getReg();
1060 LLT RegTy = MRI.getType(DstReg);
1061
1062 // Only supports scalars for now.
1063 if (RegTy.isVector())
1064 return false;
1065
1066 Register SrcReg = MI.getOperand(1).getReg();
1067 auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
1068 if (!LoadDef || !MRI.hasOneNonDBGUse(SrcReg))
1069 return false;
1070
1071 uint64_t MemBits = LoadDef->getMemSizeInBits().getValue();
1072
1073 // If the sign extend extends from a narrower width than the load's width,
1074 // then we can narrow the load width when we combine to a G_SEXTLOAD.
1075 // Avoid widening the load at all.
1076 unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits);
1077
1078 // Don't generate G_SEXTLOADs with a < 1 byte width.
1079 if (NewSizeBits < 8)
1080 return false;
1081 // Don't bother creating a non-power-2 sextload, it will likely be broken up
1082 // anyway for most targets.
1083 if (!isPowerOf2_32(NewSizeBits))
1084 return false;
1085
1086 const MachineMemOperand &MMO = LoadDef->getMMO();
1087 LegalityQuery::MemDesc MMDesc(MMO);
1088
1089 // Don't modify the memory access size if this is atomic/volatile, but we can
1090 // still adjust the opcode to indicate the high bit behavior.
1091 if (LoadDef->isSimple())
1092 MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
1093 else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
1094 return false;
1095
1096 // TODO: Could check if it's legal with the reduced or original memory size.
1097 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
1098 {MRI.getType(LoadDef->getDstReg()),
1099 MRI.getType(LoadDef->getPointerReg())},
1100 {MMDesc}}))
1101 return false;
1102
1103 MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
1104 return true;
1105}
1106
1108 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const {
1109 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1110 Register LoadReg;
1111 unsigned ScalarSizeBits;
1112 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
1113 GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
1114
1115 // If we have the following:
1116 // %ld = G_LOAD %ptr, (load 2)
1117 // %ext = G_SEXT_INREG %ld, 8
1118 // ==>
1119 // %ld = G_SEXTLOAD %ptr (load 1)
1120
1121 auto &MMO = LoadDef->getMMO();
1122 Builder.setInstrAndDebugLoc(*LoadDef);
1123 auto &MF = Builder.getMF();
1124 auto PtrInfo = MMO.getPointerInfo();
1125 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
1126 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
1127 LoadDef->getPointerReg(), *NewMMO);
1128 MI.eraseFromParent();
1129
1130 // Not all loads can be deleted, so make sure the old one is removed.
1131 LoadDef->eraseFromParent();
1132}
1133
1134/// Return true if 'MI' is a load or a store that may be fold it's address
1135/// operand into the load / store addressing mode.
1137 MachineRegisterInfo &MRI) {
1139 auto *MF = MI->getMF();
1140 auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
1141 if (!Addr)
1142 return false;
1143
1144 AM.HasBaseReg = true;
1145 if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
1146 AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
1147 else
1148 AM.Scale = 1; // [reg +/- reg]
1149
1150 return TLI.isLegalAddressingMode(
1151 MF->getDataLayout(), AM,
1152 getTypeForLLT(MI->getMMO().getMemoryType(),
1153 MF->getFunction().getContext()),
1154 MI->getMMO().getAddrSpace());
1155}
1156
1157static unsigned getIndexedOpc(unsigned LdStOpc) {
1158 switch (LdStOpc) {
1159 case TargetOpcode::G_LOAD:
1160 return TargetOpcode::G_INDEXED_LOAD;
1161 case TargetOpcode::G_STORE:
1162 return TargetOpcode::G_INDEXED_STORE;
1163 case TargetOpcode::G_ZEXTLOAD:
1164 return TargetOpcode::G_INDEXED_ZEXTLOAD;
1165 case TargetOpcode::G_SEXTLOAD:
1166 return TargetOpcode::G_INDEXED_SEXTLOAD;
1167 default:
1168 llvm_unreachable("Unexpected opcode");
1169 }
1170}
1171
1172bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
1173 // Check for legality.
1174 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
1175 LLT Ty = MRI.getType(LdSt.getReg(0));
1176 LLT MemTy = LdSt.getMMO().getMemoryType();
1178 {{MemTy, MemTy.getSizeInBits().getKnownMinValue(),
1180 unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
1181 SmallVector<LLT> OpTys;
1182 if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
1183 OpTys = {PtrTy, Ty, Ty};
1184 else
1185 OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
1186
1187 LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
1188 return isLegal(Q);
1189}
1190
1192 "post-index-use-threshold", cl::Hidden, cl::init(32),
1193 cl::desc("Number of uses of a base pointer to check before it is no longer "
1194 "considered for post-indexing."));
1195
1196bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
1198 bool &RematOffset) const {
1199 // We're looking for the following pattern, for either load or store:
1200 // %baseptr:_(p0) = ...
1201 // G_STORE %val(s64), %baseptr(p0)
1202 // %offset:_(s64) = G_CONSTANT i64 -256
1203 // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
1204 const auto &TLI = getTargetLowering();
1205
1206 Register Ptr = LdSt.getPointerReg();
1207 // If the store is the only use, don't bother.
1208 if (MRI.hasOneNonDBGUse(Ptr))
1209 return false;
1210
1211 if (!isIndexedLoadStoreLegal(LdSt))
1212 return false;
1213
1214 if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
1215 return false;
1216
1217 MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
1218 auto *PtrDef = MRI.getVRegDef(Ptr);
1219
1220 unsigned NumUsesChecked = 0;
1221 for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
1222 if (++NumUsesChecked > PostIndexUseThreshold)
1223 return false; // Try to avoid exploding compile time.
1224
1225 auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
1226 // The use itself might be dead. This can happen during combines if DCE
1227 // hasn't had a chance to run yet. Don't allow it to form an indexed op.
1228 if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
1229 continue;
1230
1231 // Check the user of this isn't the store, otherwise we'd be generate a
1232 // indexed store defining its own use.
1233 if (StoredValDef == &Use)
1234 continue;
1235
1236 Offset = PtrAdd->getOffsetReg();
1237 if (!ForceLegalIndexing &&
1238 !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
1239 /*IsPre*/ false, MRI))
1240 continue;
1241
1242 // Make sure the offset calculation is before the potentially indexed op.
1243 MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
1244 RematOffset = false;
1245 if (!dominates(*OffsetDef, LdSt)) {
1246 // If the offset however is just a G_CONSTANT, we can always just
1247 // rematerialize it where we need it.
1248 if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
1249 continue;
1250 RematOffset = true;
1251 }
1252
1253 for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
1254 if (&BasePtrUse == PtrDef)
1255 continue;
1256
1257 // If the user is a later load/store that can be post-indexed, then don't
1258 // combine this one.
1259 auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
1260 if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
1261 dominates(LdSt, *BasePtrLdSt) &&
1262 isIndexedLoadStoreLegal(*BasePtrLdSt))
1263 return false;
1264
1265 // Now we're looking for the key G_PTR_ADD instruction, which contains
1266 // the offset add that we want to fold.
1267 if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
1268 Register PtrAddDefReg = BasePtrUseDef->getReg(0);
1269 for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
1270 // If the use is in a different block, then we may produce worse code
1271 // due to the extra register pressure.
1272 if (BaseUseUse.getParent() != LdSt.getParent())
1273 return false;
1274
1275 if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
1276 if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
1277 return false;
1278 }
1279 if (!dominates(LdSt, BasePtrUse))
1280 return false; // All use must be dominated by the load/store.
1281 }
1282 }
1283
1284 Addr = PtrAdd->getReg(0);
1285 Base = PtrAdd->getBaseReg();
1286 return true;
1287 }
1288
1289 return false;
1290}
1291
1292bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
1293 Register &Base,
1294 Register &Offset) const {
1295 auto &MF = *LdSt.getParent()->getParent();
1296 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1297
1298 Addr = LdSt.getPointerReg();
1299 if (!mi_match(Addr, MRI, m_GPtrAdd(m_Reg(Base), m_Reg(Offset))) ||
1300 MRI.hasOneNonDBGUse(Addr))
1301 return false;
1302
1303 if (!ForceLegalIndexing &&
1304 !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
1305 return false;
1306
1307 if (!isIndexedLoadStoreLegal(LdSt))
1308 return false;
1309
1310 MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
1311 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1312 return false;
1313
1314 if (auto *St = dyn_cast<GStore>(&LdSt)) {
1315 // Would require a copy.
1316 if (Base == St->getValueReg())
1317 return false;
1318
1319 // We're expecting one use of Addr in MI, but it could also be the
1320 // value stored, which isn't actually dominated by the instruction.
1321 if (St->getValueReg() == Addr)
1322 return false;
1323 }
1324
1325 // Avoid increasing cross-block register pressure.
1326 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
1327 if (AddrUse.getParent() != LdSt.getParent())
1328 return false;
1329
1330 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
1331 // That might allow us to end base's liveness here by adjusting the constant.
1332 bool RealUse = false;
1333 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
1334 if (!dominates(LdSt, AddrUse))
1335 return false; // All use must be dominated by the load/store.
1336
1337 // If Ptr may be folded in addressing mode of other use, then it's
1338 // not profitable to do this transformation.
1339 if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
1340 if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
1341 RealUse = true;
1342 } else {
1343 RealUse = true;
1344 }
1345 }
1346 return RealUse;
1347}
1348
1350 MachineInstr &MI, BuildFnTy &MatchInfo) const {
1351 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
1352
1353 // Check if there is a load that defines the vector being extracted from.
1354 auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
1355 if (!LoadMI)
1356 return false;
1357
1358 Register Vector = MI.getOperand(1).getReg();
1359 LLT VecEltTy = MRI.getType(Vector).getElementType();
1360
1361 assert(MRI.getType(MI.getOperand(0).getReg()) == VecEltTy);
1362
1363 // Checking whether we should reduce the load width.
1364 if (!MRI.hasOneNonDBGUse(Vector))
1365 return false;
1366
1367 // Check if the defining load is simple.
1368 if (!LoadMI->isSimple())
1369 return false;
1370
1371 // If the vector element type is not a multiple of a byte then we are unable
1372 // to correctly compute an address to load only the extracted element as a
1373 // scalar.
1374 if (!VecEltTy.isByteSized())
1375 return false;
1376
1377 // Check for load fold barriers between the extraction and the load.
1378 if (MI.getParent() != LoadMI->getParent())
1379 return false;
1380 const unsigned MaxIter = 20;
1381 unsigned Iter = 0;
1382 for (auto II = LoadMI->getIterator(), IE = MI.getIterator(); II != IE; ++II) {
1383 if (II->isLoadFoldBarrier())
1384 return false;
1385 if (Iter++ == MaxIter)
1386 return false;
1387 }
1388
1389 // Check if the new load that we are going to create is legal
1390 // if we are in the post-legalization phase.
1391 MachineMemOperand MMO = LoadMI->getMMO();
1392 Align Alignment = MMO.getAlign();
1393 MachinePointerInfo PtrInfo;
1395
1396 // Finding the appropriate PtrInfo if offset is a known constant.
1397 // This is required to create the memory operand for the narrowed load.
1398 // This machine memory operand object helps us infer about legality
1399 // before we proceed to combine the instruction.
1400 if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
1401 int Elt = CVal->getZExtValue();
1402 // FIXME: should be (ABI size)*Elt.
1403 Offset = VecEltTy.getSizeInBits() * Elt / 8;
1404 PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
1405 } else {
1406 // Discard the pointer info except the address space because the memory
1407 // operand can't represent this new access since the offset is variable.
1408 Offset = VecEltTy.getSizeInBits() / 8;
1410 }
1411
1412 Alignment = commonAlignment(Alignment, Offset);
1413
1414 Register VecPtr = LoadMI->getPointerReg();
1415 LLT PtrTy = MRI.getType(VecPtr);
1416
1417 MachineFunction &MF = *MI.getMF();
1418 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);
1419
1420 LegalityQuery::MemDesc MMDesc(*NewMMO);
1421
1423 {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}}))
1424 return false;
1425
1426 // Load must be allowed and fast on the target.
1428 auto &DL = MF.getDataLayout();
1429 unsigned Fast = 0;
1430 if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
1431 &Fast) ||
1432 !Fast)
1433 return false;
1434
1435 Register Result = MI.getOperand(0).getReg();
1436 Register Index = MI.getOperand(2).getReg();
1437
1438 MatchInfo = [=](MachineIRBuilder &B) {
1439 GISelObserverWrapper DummyObserver;
1440 LegalizerHelper Helper(B.getMF(), DummyObserver, B);
1441 //// Get pointer to the vector element.
1442 Register finalPtr = Helper.getVectorElementPointer(
1443 LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
1444 Index);
1445 // New G_LOAD instruction.
1446 B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
1447 // Remove original GLOAD instruction.
1448 LoadMI->eraseFromParent();
1449 };
1450
1451 return true;
1452}
1453
1455 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const {
1456 auto &LdSt = cast<GLoadStore>(MI);
1457
1458 if (LdSt.isAtomic())
1459 return false;
1460
1461 MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1462 MatchInfo.Offset);
1463 if (!MatchInfo.IsPre &&
1464 !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1465 MatchInfo.Offset, MatchInfo.RematOffset))
1466 return false;
1467
1468 return true;
1469}
1470
1472 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const {
1473 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
1474 unsigned Opcode = MI.getOpcode();
1475 bool IsStore = Opcode == TargetOpcode::G_STORE;
1476 unsigned NewOpcode = getIndexedOpc(Opcode);
1477
1478 // If the offset constant didn't happen to dominate the load/store, we can
1479 // just clone it as needed.
1480 if (MatchInfo.RematOffset) {
1481 auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
1482 auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
1483 *OldCst->getOperand(1).getCImm());
1484 MatchInfo.Offset = NewCst.getReg(0);
1485 }
1486
1487 auto MIB = Builder.buildInstr(NewOpcode);
1488 if (IsStore) {
1489 MIB.addDef(MatchInfo.Addr);
1490 MIB.addUse(MI.getOperand(0).getReg());
1491 } else {
1492 MIB.addDef(MI.getOperand(0).getReg());
1493 MIB.addDef(MatchInfo.Addr);
1494 }
1495
1496 MIB.addUse(MatchInfo.Base);
1497 MIB.addUse(MatchInfo.Offset);
1498 MIB.addImm(MatchInfo.IsPre);
1499 MIB->cloneMemRefs(*MI.getMF(), MI);
1500 MI.eraseFromParent();
1501 AddrDef.eraseFromParent();
1502
1503 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
1504}
1505
1507 MachineInstr *&OtherMI) const {
1508 unsigned Opcode = MI.getOpcode();
1509 bool IsDiv, IsSigned;
1510
1511 switch (Opcode) {
1512 default:
1513 llvm_unreachable("Unexpected opcode!");
1514 case TargetOpcode::G_SDIV:
1515 case TargetOpcode::G_UDIV: {
1516 IsDiv = true;
1517 IsSigned = Opcode == TargetOpcode::G_SDIV;
1518 break;
1519 }
1520 case TargetOpcode::G_SREM:
1521 case TargetOpcode::G_UREM: {
1522 IsDiv = false;
1523 IsSigned = Opcode == TargetOpcode::G_SREM;
1524 break;
1525 }
1526 }
1527
1528 Register Src1 = MI.getOperand(1).getReg();
1529 unsigned DivOpcode, RemOpcode, DivremOpcode;
1530 if (IsSigned) {
1531 DivOpcode = TargetOpcode::G_SDIV;
1532 RemOpcode = TargetOpcode::G_SREM;
1533 DivremOpcode = TargetOpcode::G_SDIVREM;
1534 } else {
1535 DivOpcode = TargetOpcode::G_UDIV;
1536 RemOpcode = TargetOpcode::G_UREM;
1537 DivremOpcode = TargetOpcode::G_UDIVREM;
1538 }
1539
1540 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
1541 return false;
1542
1543 // Combine:
1544 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1545 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1546 // into:
1547 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1548
1549 // Combine:
1550 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1551 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1552 // into:
1553 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1554
1555 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1556 if (MI.getParent() == UseMI.getParent() &&
1557 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1558 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1559 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) &&
1560 matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) {
1561 OtherMI = &UseMI;
1562 return true;
1563 }
1564 }
1565
1566 return false;
1567}
1568
1570 MachineInstr *&OtherMI) const {
1571 unsigned Opcode = MI.getOpcode();
1572 assert(OtherMI && "OtherMI shouldn't be empty.");
1573
1574 Register DestDivReg, DestRemReg;
1575 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1576 DestDivReg = MI.getOperand(0).getReg();
1577 DestRemReg = OtherMI->getOperand(0).getReg();
1578 } else {
1579 DestDivReg = OtherMI->getOperand(0).getReg();
1580 DestRemReg = MI.getOperand(0).getReg();
1581 }
1582
1583 bool IsSigned =
1584 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1585
1586 // Check which instruction is first in the block so we don't break def-use
1587 // deps by "moving" the instruction incorrectly. Also keep track of which
1588 // instruction is first so we pick it's operands, avoiding use-before-def
1589 // bugs.
1590 MachineInstr *FirstInst = dominates(MI, *OtherMI) ? &MI : OtherMI;
1591 Builder.setInstrAndDebugLoc(*FirstInst);
1592
1593 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1594 : TargetOpcode::G_UDIVREM,
1595 {DestDivReg, DestRemReg},
1596 { FirstInst->getOperand(1), FirstInst->getOperand(2) });
1597 MI.eraseFromParent();
1598 OtherMI->eraseFromParent();
1599}
1600
1602 MachineInstr &MI, MachineInstr *&BrCond) const {
1603 assert(MI.getOpcode() == TargetOpcode::G_BR);
1604
1605 // Try to match the following:
1606 // bb1:
1607 // G_BRCOND %c1, %bb2
1608 // G_BR %bb3
1609 // bb2:
1610 // ...
1611 // bb3:
1612
1613 // The above pattern does not have a fall through to the successor bb2, always
1614 // resulting in a branch no matter which path is taken. Here we try to find
1615 // and replace that pattern with conditional branch to bb3 and otherwise
1616 // fallthrough to bb2. This is generally better for branch predictors.
1617
1618 MachineBasicBlock *MBB = MI.getParent();
1620 if (BrIt == MBB->begin())
1621 return false;
1622 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1623
1624 BrCond = &*std::prev(BrIt);
1625 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1626 return false;
1627
1628 // Check that the next block is the conditional branch target. Also make sure
1629 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1630 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1631 return BrCondTarget != MI.getOperand(0).getMBB() &&
1632 MBB->isLayoutSuccessor(BrCondTarget);
1633}
1634
1636 MachineInstr &MI, MachineInstr *&BrCond) const {
1637 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1638 Builder.setInstrAndDebugLoc(*BrCond);
1639 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1640 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1641 // this to i1 only since we might not know for sure what kind of
1642 // compare generated the condition value.
1643 auto True = Builder.buildConstant(
1644 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1645 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1646
1647 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1648 Observer.changingInstr(MI);
1649 MI.getOperand(0).setMBB(FallthroughBB);
1650 Observer.changedInstr(MI);
1651
1652 // Change the conditional branch to use the inverted condition and
1653 // new target block.
1654 Observer.changingInstr(*BrCond);
1655 BrCond->getOperand(0).setReg(Xor.getReg(0));
1656 BrCond->getOperand(1).setMBB(BrTarget);
1657 Observer.changedInstr(*BrCond);
1658}
1659
1661 MachineIRBuilder HelperBuilder(MI);
1662 GISelObserverWrapper DummyObserver;
1663 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1664 return Helper.lowerMemcpyInline(MI) ==
1666}
1667
1669 unsigned MaxLen) const {
1670 MachineIRBuilder HelperBuilder(MI);
1671 GISelObserverWrapper DummyObserver;
1672 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1673 return Helper.lowerMemCpyFamily(MI, MaxLen) ==
1675}
1676
1678 const MachineRegisterInfo &MRI,
1679 const APFloat &Val) {
1680 APFloat Result(Val);
1681 switch (MI.getOpcode()) {
1682 default:
1683 llvm_unreachable("Unexpected opcode!");
1684 case TargetOpcode::G_FNEG: {
1685 Result.changeSign();
1686 return Result;
1687 }
1688 case TargetOpcode::G_FABS: {
1689 Result.clearSign();
1690 return Result;
1691 }
1692 case TargetOpcode::G_FCEIL:
1693 Result.roundToIntegral(APFloat::rmTowardPositive);
1694 return Result;
1695 case TargetOpcode::G_FFLOOR:
1696 Result.roundToIntegral(APFloat::rmTowardNegative);
1697 return Result;
1698 case TargetOpcode::G_INTRINSIC_TRUNC:
1699 Result.roundToIntegral(APFloat::rmTowardZero);
1700 return Result;
1701 case TargetOpcode::G_INTRINSIC_ROUND:
1702 Result.roundToIntegral(APFloat::rmNearestTiesToAway);
1703 return Result;
1704 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
1705 Result.roundToIntegral(APFloat::rmNearestTiesToEven);
1706 return Result;
1707 case TargetOpcode::G_FRINT:
1708 case TargetOpcode::G_FNEARBYINT:
1709 // Use default rounding mode (round to nearest, ties to even)
1710 Result.roundToIntegral(APFloat::rmNearestTiesToEven);
1711 return Result;
1712 case TargetOpcode::G_FPEXT:
1713 case TargetOpcode::G_FPTRUNC: {
1714 bool Unused;
1715 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1717 &Unused);
1718 return Result;
1719 }
1720 case TargetOpcode::G_FSQRT: {
1721 bool Unused;
1723 &Unused);
1724 Result = APFloat(sqrt(Result.convertToDouble()));
1725 break;
1726 }
1727 case TargetOpcode::G_FLOG2: {
1728 bool Unused;
1730 &Unused);
1731 Result = APFloat(log2(Result.convertToDouble()));
1732 break;
1733 }
1734 }
1735 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1736 // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
1737 // `G_FLOG2` reach here.
1738 bool Unused;
1739 Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
1740 return Result;
1741}
1742
1744 MachineInstr &MI, const ConstantFP *Cst) const {
1745 APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
1746 const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
1747 Builder.buildFConstant(MI.getOperand(0), *NewCst);
1748 MI.eraseFromParent();
1749}
1750
1752 PtrAddChain &MatchInfo) const {
1753 // We're trying to match the following pattern:
1754 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1755 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1756 // -->
1757 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1758
1759 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1760 return false;
1761
1762 Register Add2 = MI.getOperand(1).getReg();
1763 Register Imm1 = MI.getOperand(2).getReg();
1764 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1765 if (!MaybeImmVal)
1766 return false;
1767
1768 MachineInstr *Add2Def = MRI.getVRegDef(Add2);
1769 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1770 return false;
1771
1772 Register Base = Add2Def->getOperand(1).getReg();
1773 Register Imm2 = Add2Def->getOperand(2).getReg();
1774 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1775 if (!MaybeImm2Val)
1776 return false;
1777
1778 // Check if the new combined immediate forms an illegal addressing mode.
1779 // Do not combine if it was legal before but would get illegal.
1780 // To do so, we need to find a load/store user of the pointer to get
1781 // the access type.
1782 Type *AccessTy = nullptr;
1783 auto &MF = *MI.getMF();
1784 for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
1785 if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
1786 AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
1787 MF.getFunction().getContext());
1788 break;
1789 }
1790 }
1792 APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
1793 AMNew.BaseOffs = CombinedImm.getSExtValue();
1794 if (AccessTy) {
1795 AMNew.HasBaseReg = true;
1797 AMOld.BaseOffs = MaybeImmVal->Value.getSExtValue();
1798 AMOld.HasBaseReg = true;
1799 unsigned AS = MRI.getType(Add2).getAddressSpace();
1800 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1801 if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
1802 !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
1803 return false;
1804 }
1805
1806 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
1807 // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
1808 // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
1809 // largest signed integer that fits into the index type, which is the maximum
1810 // size of allocated objects according to the IR Language Reference.
1811 unsigned PtrAddFlags = MI.getFlags();
1812 unsigned LHSPtrAddFlags = Add2Def->getFlags();
1813 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
1814 bool IsInBounds =
1815 PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
1816 unsigned Flags = 0;
1817 if (IsNoUWrap)
1819 if (IsInBounds) {
1822 }
1823
1824 // Pass the combined immediate to the apply function.
1825 MatchInfo.Imm = AMNew.BaseOffs;
1826 MatchInfo.Base = Base;
1827 MatchInfo.Bank = getRegBank(Imm2);
1828 MatchInfo.Flags = Flags;
1829 return true;
1830}
1831
1833 PtrAddChain &MatchInfo) const {
1834 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1835 MachineIRBuilder MIB(MI);
1836 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1837 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1838 setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
1839 Observer.changingInstr(MI);
1840 MI.getOperand(1).setReg(MatchInfo.Base);
1841 MI.getOperand(2).setReg(NewOffset.getReg(0));
1842 MI.setFlags(MatchInfo.Flags);
1843 Observer.changedInstr(MI);
1844}
1845
1847 RegisterImmPair &MatchInfo) const {
1848 // We're trying to match the following pattern with any of
1849 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1850 // %t1 = SHIFT %base, G_CONSTANT imm1
1851 // %root = SHIFT %t1, G_CONSTANT imm2
1852 // -->
1853 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1854
1855 unsigned Opcode = MI.getOpcode();
1856 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1857 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1858 Opcode == TargetOpcode::G_USHLSAT) &&
1859 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1860
1861 Register Shl2 = MI.getOperand(1).getReg();
1862 Register Imm1 = MI.getOperand(2).getReg();
1863 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1864 if (!MaybeImmVal)
1865 return false;
1866
1867 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1868 if (Shl2Def->getOpcode() != Opcode)
1869 return false;
1870
1871 Register Base = Shl2Def->getOperand(1).getReg();
1872 Register Imm2 = Shl2Def->getOperand(2).getReg();
1873 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1874 if (!MaybeImm2Val)
1875 return false;
1876
1877 // Pass the combined immediate to the apply function.
1878 MatchInfo.Imm =
1879 (MaybeImmVal->Value.getZExtValue() + MaybeImm2Val->Value).getZExtValue();
1880 MatchInfo.Reg = Base;
1881
1882 // There is no simple replacement for a saturating unsigned left shift that
1883 // exceeds the scalar size.
1884 if (Opcode == TargetOpcode::G_USHLSAT &&
1885 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1886 return false;
1887
1888 return true;
1889}
1890
1892 RegisterImmPair &MatchInfo) const {
1893 unsigned Opcode = MI.getOpcode();
1894 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1895 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1896 Opcode == TargetOpcode::G_USHLSAT) &&
1897 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1898
1899 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1900 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1901 auto Imm = MatchInfo.Imm;
1902
1903 if (Imm >= ScalarSizeInBits) {
1904 // Any logical shift that exceeds scalar size will produce zero.
1905 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1906 Builder.buildConstant(MI.getOperand(0), 0);
1907 MI.eraseFromParent();
1908 return;
1909 }
1910 // Arithmetic shift and saturating signed left shift have no effect beyond
1911 // scalar size.
1912 Imm = ScalarSizeInBits - 1;
1913 }
1914
1915 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1916 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1917 Observer.changingInstr(MI);
1918 MI.getOperand(1).setReg(MatchInfo.Reg);
1919 MI.getOperand(2).setReg(NewImm);
1920 Observer.changedInstr(MI);
1921}
1922
1924 MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const {
1925 // We're trying to match the following pattern with any of
1926 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1927 // with any of G_AND/G_OR/G_XOR logic instructions.
1928 // %t1 = SHIFT %X, G_CONSTANT C0
1929 // %t2 = LOGIC %t1, %Y
1930 // %root = SHIFT %t2, G_CONSTANT C1
1931 // -->
1932 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1933 // %t4 = SHIFT %Y, G_CONSTANT C1
1934 // %root = LOGIC %t3, %t4
1935 unsigned ShiftOpcode = MI.getOpcode();
1936 assert((ShiftOpcode == TargetOpcode::G_SHL ||
1937 ShiftOpcode == TargetOpcode::G_ASHR ||
1938 ShiftOpcode == TargetOpcode::G_LSHR ||
1939 ShiftOpcode == TargetOpcode::G_USHLSAT ||
1940 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
1941 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1942
1943 // Match a one-use bitwise logic op.
1944 Register LogicDest = MI.getOperand(1).getReg();
1945 if (!MRI.hasOneNonDBGUse(LogicDest))
1946 return false;
1947
1948 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
1949 unsigned LogicOpcode = LogicMI->getOpcode();
1950 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
1951 LogicOpcode != TargetOpcode::G_XOR)
1952 return false;
1953
1954 // Find a matching one-use shift by constant.
1955 const Register C1 = MI.getOperand(2).getReg();
1956 auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
1957 if (!MaybeImmVal || MaybeImmVal->Value == 0)
1958 return false;
1959
1960 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
1961
1962 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
1963 // Shift should match previous one and should be a one-use.
1964 if (MI->getOpcode() != ShiftOpcode ||
1965 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1966 return false;
1967
1968 // Must be a constant.
1969 auto MaybeImmVal =
1970 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1971 if (!MaybeImmVal)
1972 return false;
1973
1974 ShiftVal = MaybeImmVal->Value.getSExtValue();
1975 return true;
1976 };
1977
1978 // Logic ops are commutative, so check each operand for a match.
1979 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
1980 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
1981 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
1982 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
1983 uint64_t C0Val;
1984
1985 if (matchFirstShift(LogicMIOp1, C0Val)) {
1986 MatchInfo.LogicNonShiftReg = LogicMIReg2;
1987 MatchInfo.Shift2 = LogicMIOp1;
1988 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
1989 MatchInfo.LogicNonShiftReg = LogicMIReg1;
1990 MatchInfo.Shift2 = LogicMIOp2;
1991 } else
1992 return false;
1993
1994 MatchInfo.ValSum = C0Val + C1Val;
1995
1996 // The fold is not valid if the sum of the shift values exceeds bitwidth.
1997 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
1998 return false;
1999
2000 MatchInfo.Logic = LogicMI;
2001 return true;
2002}
2003
2005 MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const {
2006 unsigned Opcode = MI.getOpcode();
2007 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
2008 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
2009 Opcode == TargetOpcode::G_SSHLSAT) &&
2010 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
2011
2012 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
2013 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
2014
2015 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
2016
2017 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
2018 Register Shift1 =
2019 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
2020
2021 // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
2022 // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
2023 // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
2024 // remove old shift1. And it will cause crash later. So erase it earlier to
2025 // avoid the crash.
2026 MatchInfo.Shift2->eraseFromParent();
2027
2028 Register Shift2Const = MI.getOperand(2).getReg();
2029 Register Shift2 = Builder
2030 .buildInstr(Opcode, {DestType},
2031 {MatchInfo.LogicNonShiftReg, Shift2Const})
2032 .getReg(0);
2033
2034 Register Dest = MI.getOperand(0).getReg();
2035 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
2036
2037 // This was one use so it's safe to remove it.
2038 MatchInfo.Logic->eraseFromParent();
2039
2040 MI.eraseFromParent();
2041}
2042
2044 BuildFnTy &MatchInfo) const {
2045 assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
2046 // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
2047 // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
2048 auto &Shl = cast<GenericMachineInstr>(MI);
2049 Register DstReg = Shl.getReg(0);
2050 Register SrcReg = Shl.getReg(1);
2051 Register ShiftReg = Shl.getReg(2);
2052 Register X, C1;
2053
2054 if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
2055 return false;
2056
2057 if (!mi_match(SrcReg, MRI,
2059 m_GOr(m_Reg(X), m_Reg(C1))))))
2060 return false;
2061
2062 APInt C1Val, C2Val;
2063 if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
2064 !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
2065 return false;
2066
2067 auto *SrcDef = MRI.getVRegDef(SrcReg);
2068 assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
2069 SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
2070 LLT SrcTy = MRI.getType(SrcReg);
2071 MatchInfo = [=](MachineIRBuilder &B) {
2072 auto S1 = B.buildShl(SrcTy, X, ShiftReg);
2073 auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
2074 B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
2075 };
2076 return true;
2077}
2078
2080 LshrOfTruncOfLshr &MatchInfo,
2081 MachineInstr &ShiftMI) const {
2082 assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");
2083
2084 Register N0 = MI.getOperand(1).getReg();
2085 Register N1 = MI.getOperand(2).getReg();
2086 unsigned OpSizeInBits = MRI.getType(N0).getScalarSizeInBits();
2087
2088 APInt N1C, N001C;
2089 if (!mi_match(N1, MRI, m_ICstOrSplat(N1C)))
2090 return false;
2091 auto N001 = ShiftMI.getOperand(2).getReg();
2092 if (!mi_match(N001, MRI, m_ICstOrSplat(N001C)))
2093 return false;
2094
2095 if (N001C.getBitWidth() > N1C.getBitWidth())
2096 N1C = N1C.zext(N001C.getBitWidth());
2097 else
2098 N001C = N001C.zext(N1C.getBitWidth());
2099
2100 Register InnerShift = ShiftMI.getOperand(0).getReg();
2101 LLT InnerShiftTy = MRI.getType(InnerShift);
2102 uint64_t InnerShiftSize = InnerShiftTy.getScalarSizeInBits();
2103 if ((N1C + N001C).ult(InnerShiftSize)) {
2104 MatchInfo.Src = ShiftMI.getOperand(1).getReg();
2105 MatchInfo.ShiftAmt = N1C + N001C;
2106 MatchInfo.ShiftAmtTy = MRI.getType(N001);
2107 MatchInfo.InnerShiftTy = InnerShiftTy;
2108
2109 if ((N001C + OpSizeInBits) == InnerShiftSize)
2110 return true;
2111 if (MRI.hasOneUse(N0) && MRI.hasOneUse(InnerShift)) {
2112 MatchInfo.Mask = true;
2113 MatchInfo.MaskVal = APInt(N1C.getBitWidth(), OpSizeInBits) - N1C;
2114 return true;
2115 }
2116 }
2117 return false;
2118}
2119
2121 MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo) const {
2122 assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");
2123
2124 Register Dst = MI.getOperand(0).getReg();
2125 auto ShiftAmt =
2126 Builder.buildConstant(MatchInfo.ShiftAmtTy, MatchInfo.ShiftAmt);
2127 auto Shift =
2128 Builder.buildLShr(MatchInfo.InnerShiftTy, MatchInfo.Src, ShiftAmt);
2129 if (MatchInfo.Mask == true) {
2130 APInt MaskVal =
2132 MatchInfo.MaskVal.getZExtValue());
2133 auto Mask = Builder.buildConstant(MatchInfo.InnerShiftTy, MaskVal);
2134 auto And = Builder.buildAnd(MatchInfo.InnerShiftTy, Shift, Mask);
2135 Builder.buildTrunc(Dst, And);
2136 } else
2137 Builder.buildTrunc(Dst, Shift);
2138 MI.eraseFromParent();
2139}
2140
2142 unsigned &ShiftVal) const {
2143 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2144 auto MaybeImmVal =
2145 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2146 if (!MaybeImmVal)
2147 return false;
2148
2149 ShiftVal = MaybeImmVal->Value.exactLogBase2();
2150 return (static_cast<int32_t>(ShiftVal) != -1);
2151}
2152
2154 unsigned &ShiftVal) const {
2155 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2156 MachineIRBuilder MIB(MI);
2157 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
2158 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
2159 Observer.changingInstr(MI);
2160 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
2161 MI.getOperand(2).setReg(ShiftCst.getReg(0));
2162 if (ShiftVal == ShiftTy.getScalarSizeInBits() - 1)
2164 Observer.changedInstr(MI);
2165}
2166
2168 BuildFnTy &MatchInfo) const {
2169 GSub &Sub = cast<GSub>(MI);
2170
2171 LLT Ty = MRI.getType(Sub.getReg(0));
2172
2173 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {Ty}}))
2174 return false;
2175
2177 return false;
2178
2179 APInt Imm = getIConstantFromReg(Sub.getRHSReg(), MRI);
2180
2181 MatchInfo = [=, &MI](MachineIRBuilder &B) {
2182 auto NegCst = B.buildConstant(Ty, -Imm);
2183 Observer.changingInstr(MI);
2184 MI.setDesc(B.getTII().get(TargetOpcode::G_ADD));
2185 MI.getOperand(2).setReg(NegCst.getReg(0));
2187 if (Imm.isMinSignedValue())
2189 Observer.changedInstr(MI);
2190 };
2191 return true;
2192}
2193
2194// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
2196 RegisterImmPair &MatchData) const {
2197 assert(MI.getOpcode() == TargetOpcode::G_SHL && VT);
2198 if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
2199 return false;
2200
2201 Register LHS = MI.getOperand(1).getReg();
2202
2203 Register ExtSrc;
2204 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
2205 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
2206 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
2207 return false;
2208
2209 Register RHS = MI.getOperand(2).getReg();
2210 MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
2211 auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
2212 if (!MaybeShiftAmtVal)
2213 return false;
2214
2215 if (LI) {
2216 LLT SrcTy = MRI.getType(ExtSrc);
2217
2218 // We only really care about the legality with the shifted value. We can
2219 // pick any type the constant shift amount, so ask the target what to
2220 // use. Otherwise we would have to guess and hope it is reported as legal.
2221 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
2222 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
2223 return false;
2224 }
2225
2226 int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
2227 MatchData.Reg = ExtSrc;
2228 MatchData.Imm = ShiftAmt;
2229
2230 unsigned MinLeadingZeros = VT->getKnownZeroes(ExtSrc).countl_one();
2231 unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
2232 return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
2233}
2234
2236 MachineInstr &MI, const RegisterImmPair &MatchData) const {
2237 Register ExtSrcReg = MatchData.Reg;
2238 int64_t ShiftAmtVal = MatchData.Imm;
2239
2240 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
2241 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
2242 auto NarrowShift =
2243 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
2244 Builder.buildZExt(MI.getOperand(0), NarrowShift);
2245 MI.eraseFromParent();
2246}
2247
2249 Register &MatchInfo) const {
2251 SmallVector<Register, 16> MergedValues;
2252 for (unsigned I = 0; I < Merge.getNumSources(); ++I)
2253 MergedValues.emplace_back(Merge.getSourceReg(I));
2254
2255 auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
2256 if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
2257 return false;
2258
2259 for (unsigned I = 0; I < MergedValues.size(); ++I)
2260 if (MergedValues[I] != Unmerge->getReg(I))
2261 return false;
2262
2263 MatchInfo = Unmerge->getSourceReg();
2264 return true;
2265}
2266
2268 const MachineRegisterInfo &MRI) {
2269 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
2270 ;
2271
2272 return Reg;
2273}
2274
2276 MachineInstr &MI, SmallVectorImpl<Register> &Operands) const {
2277 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2278 "Expected an unmerge");
2279 auto &Unmerge = cast<GUnmerge>(MI);
2280 Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
2281
2282 auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI);
2283 if (!SrcInstr)
2284 return false;
2285
2286 // Check the source type of the merge.
2287 LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
2288 LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
2289 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
2290 if (SrcMergeTy != Dst0Ty && !SameSize)
2291 return false;
2292 // They are the same now (modulo a bitcast).
2293 // We can collect all the src registers.
2294 for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
2295 Operands.push_back(SrcInstr->getSourceReg(Idx));
2296 return true;
2297}
2298
2300 MachineInstr &MI, SmallVectorImpl<Register> &Operands) const {
2301 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2302 "Expected an unmerge");
2303 assert((MI.getNumOperands() - 1 == Operands.size()) &&
2304 "Not enough operands to replace all defs");
2305 unsigned NumElems = MI.getNumOperands() - 1;
2306
2307 LLT SrcTy = MRI.getType(Operands[0]);
2308 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2309 bool CanReuseInputDirectly = DstTy == SrcTy;
2310 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2311 Register DstReg = MI.getOperand(Idx).getReg();
2312 Register SrcReg = Operands[Idx];
2313
2314 // This combine may run after RegBankSelect, so we need to be aware of
2315 // register banks.
2316 const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
2317 if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
2318 SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
2319 MRI.setRegClassOrRegBank(SrcReg, DstCB);
2320 }
2321
2322 if (CanReuseInputDirectly)
2323 replaceRegWith(MRI, DstReg, SrcReg);
2324 else
2325 Builder.buildCast(DstReg, SrcReg);
2326 }
2327 MI.eraseFromParent();
2328}
2329
2331 MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const {
2332 unsigned SrcIdx = MI.getNumOperands() - 1;
2333 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2334 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2335 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2336 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2337 return false;
2338 // Break down the big constant in smaller ones.
2339 const MachineOperand &CstVal = SrcInstr->getOperand(1);
2340 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2341 ? CstVal.getCImm()->getValue()
2342 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2343
2344 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2345 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2346 // Unmerge a constant.
2347 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2348 Csts.emplace_back(Val.trunc(ShiftAmt));
2349 Val = Val.lshr(ShiftAmt);
2350 }
2351
2352 return true;
2353}
2354
2356 MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const {
2357 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2358 "Expected an unmerge");
2359 assert((MI.getNumOperands() - 1 == Csts.size()) &&
2360 "Not enough operands to replace all defs");
2361 unsigned NumElems = MI.getNumOperands() - 1;
2362 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2363 Register DstReg = MI.getOperand(Idx).getReg();
2364 Builder.buildConstant(DstReg, Csts[Idx]);
2365 }
2366
2367 MI.eraseFromParent();
2368}
2369
2372 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
2373 unsigned SrcIdx = MI.getNumOperands() - 1;
2374 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2375 MatchInfo = [&MI](MachineIRBuilder &B) {
2376 unsigned NumElems = MI.getNumOperands() - 1;
2377 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2378 Register DstReg = MI.getOperand(Idx).getReg();
2379 B.buildUndef(DstReg);
2380 }
2381 };
2382 return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
2383}
2384
2386 MachineInstr &MI) const {
2387 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2388 "Expected an unmerge");
2389 if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
2390 MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
2391 return false;
2392 // Check that all the lanes are dead except the first one.
2393 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2394 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2395 return false;
2396 }
2397 return true;
2398}
2399
2401 MachineInstr &MI) const {
2402 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2403 Register Dst0Reg = MI.getOperand(0).getReg();
2404 Builder.buildTrunc(Dst0Reg, SrcReg);
2405 MI.eraseFromParent();
2406}
2407
2409 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2410 "Expected an unmerge");
2411 Register Dst0Reg = MI.getOperand(0).getReg();
2412 LLT Dst0Ty = MRI.getType(Dst0Reg);
2413 // G_ZEXT on vector applies to each lane, so it will
2414 // affect all destinations. Therefore we won't be able
2415 // to simplify the unmerge to just the first definition.
2416 if (Dst0Ty.isVector())
2417 return false;
2418 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2419 LLT SrcTy = MRI.getType(SrcReg);
2420 if (SrcTy.isVector())
2421 return false;
2422
2423 Register ZExtSrcReg;
2424 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2425 return false;
2426
2427 // Finally we can replace the first definition with
2428 // a zext of the source if the definition is big enough to hold
2429 // all of ZExtSrc bits.
2430 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2431 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2432}
2433
2435 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2436 "Expected an unmerge");
2437
2438 Register Dst0Reg = MI.getOperand(0).getReg();
2439
2440 MachineInstr *ZExtInstr =
2441 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2442 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2443 "Expecting a G_ZEXT");
2444
2445 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2446 LLT Dst0Ty = MRI.getType(Dst0Reg);
2447 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2448
2449 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2450 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2451 } else {
2452 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2453 "ZExt src doesn't fit in destination");
2454 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2455 }
2456
2457 Register ZeroReg;
2458 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2459 if (!ZeroReg)
2460 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2461 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2462 }
2463 MI.eraseFromParent();
2464}
2465
2467 unsigned TargetShiftSize,
2468 unsigned &ShiftVal) const {
2469 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2470 MI.getOpcode() == TargetOpcode::G_LSHR ||
2471 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2472
2473 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2474 if (Ty.isVector()) // TODO:
2475 return false;
2476
2477 // Don't narrow further than the requested size.
2478 unsigned Size = Ty.getSizeInBits();
2479 if (Size <= TargetShiftSize)
2480 return false;
2481
2482 auto MaybeImmVal =
2483 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2484 if (!MaybeImmVal)
2485 return false;
2486
2487 ShiftVal = MaybeImmVal->Value.getSExtValue();
2488 return ShiftVal >= Size / 2 && ShiftVal < Size;
2489}
2490
2492 MachineInstr &MI, const unsigned &ShiftVal) const {
2493 Register DstReg = MI.getOperand(0).getReg();
2494 Register SrcReg = MI.getOperand(1).getReg();
2495 LLT Ty = MRI.getType(SrcReg);
2496 unsigned Size = Ty.getSizeInBits();
2497 unsigned HalfSize = Size / 2;
2498 assert(ShiftVal >= HalfSize);
2499
2500 LLT HalfTy = LLT::scalar(HalfSize);
2501
2502 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2503 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2504
2505 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2506 Register Narrowed = Unmerge.getReg(1);
2507
2508 // dst = G_LSHR s64:x, C for C >= 32
2509 // =>
2510 // lo, hi = G_UNMERGE_VALUES x
2511 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2512
2513 if (NarrowShiftAmt != 0) {
2514 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2515 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2516 }
2517
2518 auto Zero = Builder.buildConstant(HalfTy, 0);
2519 Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero});
2520 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2521 Register Narrowed = Unmerge.getReg(0);
2522 // dst = G_SHL s64:x, C for C >= 32
2523 // =>
2524 // lo, hi = G_UNMERGE_VALUES x
2525 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2526 if (NarrowShiftAmt != 0) {
2527 Narrowed = Builder.buildShl(HalfTy, Narrowed,
2528 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2529 }
2530
2531 auto Zero = Builder.buildConstant(HalfTy, 0);
2532 Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed});
2533 } else {
2534 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2535 auto Hi = Builder.buildAShr(
2536 HalfTy, Unmerge.getReg(1),
2537 Builder.buildConstant(HalfTy, HalfSize - 1));
2538
2539 if (ShiftVal == HalfSize) {
2540 // (G_ASHR i64:x, 32) ->
2541 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2542 Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi});
2543 } else if (ShiftVal == Size - 1) {
2544 // Don't need a second shift.
2545 // (G_ASHR i64:x, 63) ->
2546 // %narrowed = (G_ASHR hi_32(x), 31)
2547 // G_MERGE_VALUES %narrowed, %narrowed
2548 Builder.buildMergeLikeInstr(DstReg, {Hi, Hi});
2549 } else {
2550 auto Lo = Builder.buildAShr(
2551 HalfTy, Unmerge.getReg(1),
2552 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2553
2554 // (G_ASHR i64:x, C) ->, for C >= 32
2555 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2556 Builder.buildMergeLikeInstr(DstReg, {Lo, Hi});
2557 }
2558 }
2559
2560 MI.eraseFromParent();
2561}
2562
2564 MachineInstr &MI, unsigned TargetShiftAmount) const {
2565 unsigned ShiftAmt;
2566 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2567 applyCombineShiftToUnmerge(MI, ShiftAmt);
2568 return true;
2569 }
2570
2571 return false;
2572}
2573
2575 Register &Reg) const {
2576 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2577 Register DstReg = MI.getOperand(0).getReg();
2578 LLT DstTy = MRI.getType(DstReg);
2579 Register SrcReg = MI.getOperand(1).getReg();
2580 return mi_match(SrcReg, MRI,
2581 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2582}
2583
2585 Register &Reg) const {
2586 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2587 Register DstReg = MI.getOperand(0).getReg();
2588 Builder.buildCopy(DstReg, Reg);
2589 MI.eraseFromParent();
2590}
2591
2593 Register &Reg) const {
2594 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2595 Register DstReg = MI.getOperand(0).getReg();
2596 Builder.buildZExtOrTrunc(DstReg, Reg);
2597 MI.eraseFromParent();
2598}
2599
2601 MachineInstr &MI, std::pair<Register, bool> &PtrReg) const {
2602 assert(MI.getOpcode() == TargetOpcode::G_ADD);
2603 Register LHS = MI.getOperand(1).getReg();
2604 Register RHS = MI.getOperand(2).getReg();
2605 LLT IntTy = MRI.getType(LHS);
2606
2607 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2608 // instruction.
2609 PtrReg.second = false;
2610 for (Register SrcReg : {LHS, RHS}) {
2611 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2612 // Don't handle cases where the integer is implicitly converted to the
2613 // pointer width.
2614 LLT PtrTy = MRI.getType(PtrReg.first);
2615 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2616 return true;
2617 }
2618
2619 PtrReg.second = true;
2620 }
2621
2622 return false;
2623}
2624
2626 MachineInstr &MI, std::pair<Register, bool> &PtrReg) const {
2627 Register Dst = MI.getOperand(0).getReg();
2628 Register LHS = MI.getOperand(1).getReg();
2629 Register RHS = MI.getOperand(2).getReg();
2630
2631 const bool DoCommute = PtrReg.second;
2632 if (DoCommute)
2633 std::swap(LHS, RHS);
2634 LHS = PtrReg.first;
2635
2636 LLT PtrTy = MRI.getType(LHS);
2637
2638 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2639 Builder.buildPtrToInt(Dst, PtrAdd);
2640 MI.eraseFromParent();
2641}
2642
2644 APInt &NewCst) const {
2645 auto &PtrAdd = cast<GPtrAdd>(MI);
2646 Register LHS = PtrAdd.getBaseReg();
2647 Register RHS = PtrAdd.getOffsetReg();
2648 MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
2649
2650 if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
2651 APInt Cst;
2652 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2653 auto DstTy = MRI.getType(PtrAdd.getReg(0));
2654 // G_INTTOPTR uses zero-extension
2655 NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
2656 NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
2657 return true;
2658 }
2659 }
2660
2661 return false;
2662}
2663
2665 APInt &NewCst) const {
2666 auto &PtrAdd = cast<GPtrAdd>(MI);
2667 Register Dst = PtrAdd.getReg(0);
2668
2669 Builder.buildConstant(Dst, NewCst);
2670 PtrAdd.eraseFromParent();
2671}
2672
2674 Register &Reg) const {
2675 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2676 Register DstReg = MI.getOperand(0).getReg();
2677 Register SrcReg = MI.getOperand(1).getReg();
2678 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2679 if (OriginalSrcReg.isValid())
2680 SrcReg = OriginalSrcReg;
2681 LLT DstTy = MRI.getType(DstReg);
2682 return mi_match(SrcReg, MRI,
2683 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2684 canReplaceReg(DstReg, Reg, MRI);
2685}
2686
2688 Register &Reg) const {
2689 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2690 Register DstReg = MI.getOperand(0).getReg();
2691 Register SrcReg = MI.getOperand(1).getReg();
2692 LLT DstTy = MRI.getType(DstReg);
2693 if (mi_match(SrcReg, MRI,
2694 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2695 canReplaceReg(DstReg, Reg, MRI)) {
2696 unsigned DstSize = DstTy.getScalarSizeInBits();
2697 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2698 return VT->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2699 }
2700 return false;
2701}
2702
2704 const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
2705 const unsigned TruncSize = TruncTy.getScalarSizeInBits();
2706
2707 // ShiftTy > 32 > TruncTy -> 32
2708 if (ShiftSize > 32 && TruncSize < 32)
2709 return ShiftTy.changeElementSize(32);
2710
2711 // TODO: We could also reduce to 16 bits, but that's more target-dependent.
2712 // Some targets like it, some don't, some only like it under certain
2713 // conditions/processor versions, etc.
2714 // A TL hook might be needed for this.
2715
2716 // Don't combine
2717 return ShiftTy;
2718}
2719
2721 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const {
2722 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2723 Register DstReg = MI.getOperand(0).getReg();
2724 Register SrcReg = MI.getOperand(1).getReg();
2725
2726 if (!MRI.hasOneNonDBGUse(SrcReg))
2727 return false;
2728
2729 LLT SrcTy = MRI.getType(SrcReg);
2730 LLT DstTy = MRI.getType(DstReg);
2731
2732 MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI);
2733 const auto &TL = getTargetLowering();
2734
2735 LLT NewShiftTy;
2736 switch (SrcMI->getOpcode()) {
2737 default:
2738 return false;
2739 case TargetOpcode::G_SHL: {
2740 NewShiftTy = DstTy;
2741
2742 // Make sure new shift amount is legal.
2743 KnownBits Known = VT->getKnownBits(SrcMI->getOperand(2).getReg());
2744 if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits()))
2745 return false;
2746 break;
2747 }
2748 case TargetOpcode::G_LSHR:
2749 case TargetOpcode::G_ASHR: {
2750 // For right shifts, we conservatively do not do the transform if the TRUNC
2751 // has any STORE users. The reason is that if we change the type of the
2752 // shift, we may break the truncstore combine.
2753 //
2754 // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
2755 for (auto &User : MRI.use_instructions(DstReg))
2756 if (User.getOpcode() == TargetOpcode::G_STORE)
2757 return false;
2758
2759 NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy);
2760 if (NewShiftTy == SrcTy)
2761 return false;
2762
2763 // Make sure we won't lose information by truncating the high bits.
2764 KnownBits Known = VT->getKnownBits(SrcMI->getOperand(2).getReg());
2765 if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() -
2766 DstTy.getScalarSizeInBits()))
2767 return false;
2768 break;
2769 }
2770 }
2771
2773 {SrcMI->getOpcode(),
2774 {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}}))
2775 return false;
2776
2777 MatchInfo = std::make_pair(SrcMI, NewShiftTy);
2778 return true;
2779}
2780
2782 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const {
2783 MachineInstr *ShiftMI = MatchInfo.first;
2784 LLT NewShiftTy = MatchInfo.second;
2785
2786 Register Dst = MI.getOperand(0).getReg();
2787 LLT DstTy = MRI.getType(Dst);
2788
2789 Register ShiftAmt = ShiftMI->getOperand(2).getReg();
2790 Register ShiftSrc = ShiftMI->getOperand(1).getReg();
2791 ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0);
2792
2793 Register NewShift =
2794 Builder
2795 .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt})
2796 .getReg(0);
2797
2798 if (NewShiftTy == DstTy)
2799 replaceRegWith(MRI, Dst, NewShift);
2800 else
2801 Builder.buildTrunc(Dst, NewShift);
2802
2803 eraseInst(MI);
2804}
2805
2807 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2808 return MO.isReg() &&
2809 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2810 });
2811}
2812
2814 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2815 return !MO.isReg() ||
2816 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2817 });
2818}
2819
2821 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2822 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2823 return all_of(Mask, [](int Elt) { return Elt < 0; });
2824}
2825
2827 assert(MI.getOpcode() == TargetOpcode::G_STORE);
2828 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2829 MRI);
2830}
2831
2833 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2834 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2835 MRI);
2836}
2837
2839 MachineInstr &MI) const {
2840 assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ||
2841 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) &&
2842 "Expected an insert/extract element op");
2843 LLT VecTy = MRI.getType(MI.getOperand(1).getReg());
2844 if (VecTy.isScalableVector())
2845 return false;
2846
2847 unsigned IdxIdx =
2848 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
2849 auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI);
2850 if (!Idx)
2851 return false;
2852 return Idx->getZExtValue() >= VecTy.getNumElements();
2853}
2854
2856 unsigned &OpIdx) const {
2857 GSelect &SelMI = cast<GSelect>(MI);
2858 auto Cst =
2859 isConstantOrConstantSplatVector(*MRI.getVRegDef(SelMI.getCondReg()), MRI);
2860 if (!Cst)
2861 return false;
2862 OpIdx = Cst->isZero() ? 3 : 2;
2863 return true;
2864}
2865
2866void CombinerHelper::eraseInst(MachineInstr &MI) const { MI.eraseFromParent(); }
2867
2869 const MachineOperand &MOP2) const {
2870 if (!MOP1.isReg() || !MOP2.isReg())
2871 return false;
2872 auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
2873 if (!InstAndDef1)
2874 return false;
2875 auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
2876 if (!InstAndDef2)
2877 return false;
2878 MachineInstr *I1 = InstAndDef1->MI;
2879 MachineInstr *I2 = InstAndDef2->MI;
2880
2881 // Handle a case like this:
2882 //
2883 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2884 //
2885 // Even though %0 and %1 are produced by the same instruction they are not
2886 // the same values.
2887 if (I1 == I2)
2888 return MOP1.getReg() == MOP2.getReg();
2889
2890 // If we have an instruction which loads or stores, we can't guarantee that
2891 // it is identical.
2892 //
2893 // For example, we may have
2894 //
2895 // %x1 = G_LOAD %addr (load N from @somewhere)
2896 // ...
2897 // call @foo
2898 // ...
2899 // %x2 = G_LOAD %addr (load N from @somewhere)
2900 // ...
2901 // %or = G_OR %x1, %x2
2902 //
2903 // It's possible that @foo will modify whatever lives at the address we're
2904 // loading from. To be safe, let's just assume that all loads and stores
2905 // are different (unless we have something which is guaranteed to not
2906 // change.)
2907 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad())
2908 return false;
2909
2910 // If both instructions are loads or stores, they are equal only if both
2911 // are dereferenceable invariant loads with the same number of bits.
2912 if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
2915 if (!LS1 || !LS2)
2916 return false;
2917
2918 if (!I2->isDereferenceableInvariantLoad() ||
2919 (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
2920 return false;
2921 }
2922
2923 // Check for physical registers on the instructions first to avoid cases
2924 // like this:
2925 //
2926 // %a = COPY $physreg
2927 // ...
2928 // SOMETHING implicit-def $physreg
2929 // ...
2930 // %b = COPY $physreg
2931 //
2932 // These copies are not equivalent.
2933 if (any_of(I1->uses(), [](const MachineOperand &MO) {
2934 return MO.isReg() && MO.getReg().isPhysical();
2935 })) {
2936 // Check if we have a case like this:
2937 //
2938 // %a = COPY $physreg
2939 // %b = COPY %a
2940 //
2941 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2942 // From that, we know that they must have the same value, since they must
2943 // have come from the same COPY.
2944 return I1->isIdenticalTo(*I2);
2945 }
2946
2947 // We don't have any physical registers, so we don't necessarily need the
2948 // same vreg defs.
2949 //
2950 // On the off-chance that there's some target instruction feeding into the
2951 // instruction, let's use produceSameValue instead of isIdenticalTo.
2952 if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
2953 // Handle instructions with multiple defs that produce same values. Values
2954 // are same for operands with same index.
2955 // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2956 // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2957 // I1 and I2 are different instructions but produce same values,
2958 // %1 and %6 are same, %1 and %7 are not the same value.
2959 return I1->findRegisterDefOperandIdx(InstAndDef1->Reg, /*TRI=*/nullptr) ==
2960 I2->findRegisterDefOperandIdx(InstAndDef2->Reg, /*TRI=*/nullptr);
2961 }
2962 return false;
2963}
2964
2966 int64_t C) const {
2967 if (!MOP.isReg())
2968 return false;
2969 auto *MI = MRI.getVRegDef(MOP.getReg());
2970 auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
2971 return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
2972 MaybeCst->getSExtValue() == C;
2973}
2974
2976 double C) const {
2977 if (!MOP.isReg())
2978 return false;
2979 std::optional<FPValueAndVReg> MaybeCst;
2980 if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst)))
2981 return false;
2982
2983 return MaybeCst->Value.isExactlyValue(C);
2984}
2985
2987 unsigned OpIdx) const {
2988 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2989 Register OldReg = MI.getOperand(0).getReg();
2990 Register Replacement = MI.getOperand(OpIdx).getReg();
2991 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2992 replaceRegWith(MRI, OldReg, Replacement);
2993 MI.eraseFromParent();
2994}
2995
2997 Register Replacement) const {
2998 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2999 Register OldReg = MI.getOperand(0).getReg();
3000 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
3001 replaceRegWith(MRI, OldReg, Replacement);
3002 MI.eraseFromParent();
3003}
3004
3006 unsigned ConstIdx) const {
3007 Register ConstReg = MI.getOperand(ConstIdx).getReg();
3008 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3009
3010 // Get the shift amount
3011 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
3012 if (!VRegAndVal)
3013 return false;
3014
3015 // Return true of shift amount >= Bitwidth
3016 return (VRegAndVal->Value.uge(DstTy.getSizeInBits()));
3017}
3018
3020 assert((MI.getOpcode() == TargetOpcode::G_FSHL ||
3021 MI.getOpcode() == TargetOpcode::G_FSHR) &&
3022 "This is not a funnel shift operation");
3023
3024 Register ConstReg = MI.getOperand(3).getReg();
3025 LLT ConstTy = MRI.getType(ConstReg);
3026 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3027
3028 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
3029 assert((VRegAndVal) && "Value is not a constant");
3030
3031 // Calculate the new Shift Amount = Old Shift Amount % BitWidth
3032 APInt NewConst = VRegAndVal->Value.urem(
3033 APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
3034
3035 auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
3036 Builder.buildInstr(
3037 MI.getOpcode(), {MI.getOperand(0)},
3038 {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)});
3039
3040 MI.eraseFromParent();
3041}
3042
3044 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
3045 // Match (cond ? x : x)
3046 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
3047 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
3048 MRI);
3049}
3050
3052 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
3053 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
3054 MRI);
3055}
3056
3058 unsigned OpIdx) const {
3059 MachineOperand &MO = MI.getOperand(OpIdx);
3060 return MO.isReg() &&
3061 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
3062}
3063
3065 const MachineOperand &MO, bool OrNegative) const {
3066 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, VT, OrNegative);
3067}
3068
3070 double C) const {
3071 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3072 Builder.buildFConstant(MI.getOperand(0), C);
3073 MI.eraseFromParent();
3074}
3075
3077 int64_t C) const {
3078 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3079 Builder.buildConstant(MI.getOperand(0), C);
3080 MI.eraseFromParent();
3081}
3082
3084 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3085 Builder.buildConstant(MI.getOperand(0), C);
3086 MI.eraseFromParent();
3087}
3088
3090 ConstantFP *CFP) const {
3091 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3092 Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
3093 MI.eraseFromParent();
3094}
3095
3097 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3098 Builder.buildUndef(MI.getOperand(0));
3099 MI.eraseFromParent();
3100}
3101
3103 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const {
3104 Register LHS = MI.getOperand(1).getReg();
3105 Register RHS = MI.getOperand(2).getReg();
3106 Register &NewLHS = std::get<0>(MatchInfo);
3107 Register &NewRHS = std::get<1>(MatchInfo);
3108
3109 // Helper lambda to check for opportunities for
3110 // ((0-A) + B) -> B - A
3111 // (A + (0-B)) -> A - B
3112 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
3113 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
3114 return false;
3115 NewLHS = MaybeNewLHS;
3116 return true;
3117 };
3118
3119 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
3120}
3121
3123 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const {
3124 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
3125 "Invalid opcode");
3126 Register DstReg = MI.getOperand(0).getReg();
3127 LLT DstTy = MRI.getType(DstReg);
3128 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
3129
3130 if (DstTy.isScalableVector())
3131 return false;
3132
3133 unsigned NumElts = DstTy.getNumElements();
3134 // If this MI is part of a sequence of insert_vec_elts, then
3135 // don't do the combine in the middle of the sequence.
3136 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
3137 TargetOpcode::G_INSERT_VECTOR_ELT)
3138 return false;
3139 MachineInstr *CurrInst = &MI;
3140 MachineInstr *TmpInst;
3141 int64_t IntImm;
3142 Register TmpReg;
3143 MatchInfo.resize(NumElts);
3144 while (mi_match(
3145 CurrInst->getOperand(0).getReg(), MRI,
3146 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
3147 if (IntImm >= NumElts || IntImm < 0)
3148 return false;
3149 if (!MatchInfo[IntImm])
3150 MatchInfo[IntImm] = TmpReg;
3151 CurrInst = TmpInst;
3152 }
3153 // Variable index.
3154 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
3155 return false;
3156 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
3157 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
3158 if (!MatchInfo[I - 1].isValid())
3159 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
3160 }
3161 return true;
3162 }
3163 // If we didn't end in a G_IMPLICIT_DEF and the source is not fully
3164 // overwritten, bail out.
3165 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
3166 all_of(MatchInfo, [](Register Reg) { return !!Reg; });
3167}
3168
3170 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const {
3171 Register UndefReg;
3172 auto GetUndef = [&]() {
3173 if (UndefReg)
3174 return UndefReg;
3175 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3176 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
3177 return UndefReg;
3178 };
3179 for (Register &Reg : MatchInfo) {
3180 if (!Reg)
3181 Reg = GetUndef();
3182 }
3183 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
3184 MI.eraseFromParent();
3185}
3186
3188 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const {
3189 Register SubLHS, SubRHS;
3190 std::tie(SubLHS, SubRHS) = MatchInfo;
3191 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
3192 MI.eraseFromParent();
3193}
3194
3195bool CombinerHelper::matchBinopWithNegInner(Register MInner, Register Other,
3196 unsigned RootOpc, Register Dst,
3197 LLT Ty,
3198 BuildFnTy &MatchInfo) const {
3199 /// Helper function for matchBinopWithNeg: tries to match one commuted form
3200 /// of `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`.
3201 MachineInstr *InnerDef = MRI.getVRegDef(MInner);
3202 if (!InnerDef)
3203 return false;
3204
3205 unsigned InnerOpc = InnerDef->getOpcode();
3206 if (InnerOpc != TargetOpcode::G_ADD && InnerOpc != TargetOpcode::G_SUB)
3207 return false;
3208
3209 if (!MRI.hasOneNonDBGUse(MInner))
3210 return false;
3211
3212 Register InnerLHS = InnerDef->getOperand(1).getReg();
3213 Register InnerRHS = InnerDef->getOperand(2).getReg();
3214 Register NotSrc;
3215 Register B, C;
3216
3217 // Check if either operand is ~b
3218 auto TryMatch = [&](Register MaybeNot, Register Other) {
3219 if (mi_match(MaybeNot, MRI, m_Not(m_Reg(NotSrc)))) {
3220 if (!MRI.hasOneNonDBGUse(MaybeNot))
3221 return false;
3222 B = NotSrc;
3223 C = Other;
3224 return true;
3225 }
3226 return false;
3227 };
3228
3229 if (!TryMatch(InnerLHS, InnerRHS) && !TryMatch(InnerRHS, InnerLHS))
3230 return false;
3231
3232 // Flip add/sub
3233 unsigned FlippedOpc = (InnerOpc == TargetOpcode::G_ADD) ? TargetOpcode::G_SUB
3234 : TargetOpcode::G_ADD;
3235
3236 Register A = Other;
3237 MatchInfo = [=](MachineIRBuilder &Builder) {
3238 auto NewInner = Builder.buildInstr(FlippedOpc, {Ty}, {B, C});
3239 auto NewNot = Builder.buildNot(Ty, NewInner);
3240 Builder.buildInstr(RootOpc, {Dst}, {A, NewNot});
3241 };
3242 return true;
3243}
3244
3246 BuildFnTy &MatchInfo) const {
3247 // Fold `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`
3248 // Root MI is one of G_AND, G_OR, G_XOR.
3249 // We also look for commuted forms of operations. Pattern shouldn't apply
3250 // if there are multiple reasons of inner operations.
3251
3252 unsigned RootOpc = MI.getOpcode();
3253 Register Dst = MI.getOperand(0).getReg();
3254 LLT Ty = MRI.getType(Dst);
3255
3256 Register LHS = MI.getOperand(1).getReg();
3257 Register RHS = MI.getOperand(2).getReg();
3258 // Check the commuted and uncommuted forms of the operation.
3259 return matchBinopWithNegInner(LHS, RHS, RootOpc, Dst, Ty, MatchInfo) ||
3260 matchBinopWithNegInner(RHS, LHS, RootOpc, Dst, Ty, MatchInfo);
3261}
3262
3264 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const {
3265 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
3266 //
3267 // Creates the new hand + logic instruction (but does not insert them.)
3268 //
3269 // On success, MatchInfo is populated with the new instructions. These are
3270 // inserted in applyHoistLogicOpWithSameOpcodeHands.
3271 unsigned LogicOpcode = MI.getOpcode();
3272 assert(LogicOpcode == TargetOpcode::G_AND ||
3273 LogicOpcode == TargetOpcode::G_OR ||
3274 LogicOpcode == TargetOpcode::G_XOR);
3275 MachineIRBuilder MIB(MI);
3276 Register Dst = MI.getOperand(0).getReg();
3277 Register LHSReg = MI.getOperand(1).getReg();
3278 Register RHSReg = MI.getOperand(2).getReg();
3279
3280 // Don't recompute anything.
3281 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
3282 return false;
3283
3284 // Make sure we have (hand x, ...), (hand y, ...)
3285 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
3286 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
3287 if (!LeftHandInst || !RightHandInst)
3288 return false;
3289 unsigned HandOpcode = LeftHandInst->getOpcode();
3290 if (HandOpcode != RightHandInst->getOpcode())
3291 return false;
3292 if (LeftHandInst->getNumOperands() < 2 ||
3293 !LeftHandInst->getOperand(1).isReg() ||
3294 RightHandInst->getNumOperands() < 2 ||
3295 !RightHandInst->getOperand(1).isReg())
3296 return false;
3297
3298 // Make sure the types match up, and if we're doing this post-legalization,
3299 // we end up with legal types.
3300 Register X = LeftHandInst->getOperand(1).getReg();
3301 Register Y = RightHandInst->getOperand(1).getReg();
3302 LLT XTy = MRI.getType(X);
3303 LLT YTy = MRI.getType(Y);
3304 if (!XTy.isValid() || XTy != YTy)
3305 return false;
3306
3307 // Optional extra source register.
3308 Register ExtraHandOpSrcReg;
3309 switch (HandOpcode) {
3310 default:
3311 return false;
3312 case TargetOpcode::G_ANYEXT:
3313 case TargetOpcode::G_SEXT:
3314 case TargetOpcode::G_ZEXT: {
3315 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3316 break;
3317 }
3318 case TargetOpcode::G_TRUNC: {
3319 // Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y)
3320 const MachineFunction *MF = MI.getMF();
3321 LLVMContext &Ctx = MF->getFunction().getContext();
3322
3323 LLT DstTy = MRI.getType(Dst);
3324 const TargetLowering &TLI = getTargetLowering();
3325
3326 // Be extra careful sinking truncate. If it's free, there's no benefit in
3327 // widening a binop.
3328 if (TLI.isZExtFree(DstTy, XTy, Ctx) && TLI.isTruncateFree(XTy, DstTy, Ctx))
3329 return false;
3330 break;
3331 }
3332 case TargetOpcode::G_AND:
3333 case TargetOpcode::G_ASHR:
3334 case TargetOpcode::G_LSHR:
3335 case TargetOpcode::G_SHL: {
3336 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
3337 MachineOperand &ZOp = LeftHandInst->getOperand(2);
3338 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
3339 return false;
3340 ExtraHandOpSrcReg = ZOp.getReg();
3341 break;
3342 }
3343 }
3344
3345 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
3346 return false;
3347
3348 // Record the steps to build the new instructions.
3349 //
3350 // Steps to build (logic x, y)
3351 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
3352 OperandBuildSteps LogicBuildSteps = {
3353 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
3354 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
3355 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
3356 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
3357
3358 // Steps to build hand (logic x, y), ...z
3359 OperandBuildSteps HandBuildSteps = {
3360 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
3361 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
3362 if (ExtraHandOpSrcReg.isValid())
3363 HandBuildSteps.push_back(
3364 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
3365 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
3366
3367 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
3368 return true;
3369}
3370
3372 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const {
3373 assert(MatchInfo.InstrsToBuild.size() &&
3374 "Expected at least one instr to build?");
3375 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
3376 assert(InstrToBuild.Opcode && "Expected a valid opcode?");
3377 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
3378 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
3379 for (auto &OperandFn : InstrToBuild.OperandFns)
3380 OperandFn(Instr);
3381 }
3382 MI.eraseFromParent();
3383}
3384
3386 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const {
3387 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3388 int64_t ShlCst, AshrCst;
3389 Register Src;
3390 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3391 m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)),
3392 m_ICstOrSplat(AshrCst))))
3393 return false;
3394 if (ShlCst != AshrCst)
3395 return false;
3397 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
3398 return false;
3399 MatchInfo = std::make_tuple(Src, ShlCst);
3400 return true;
3401}
3402
3404 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const {
3405 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3406 Register Src;
3407 int64_t ShiftAmt;
3408 std::tie(Src, ShiftAmt) = MatchInfo;
3409 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
3410 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
3411 MI.eraseFromParent();
3412}
3413
3414/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3417 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
3418 assert(MI.getOpcode() == TargetOpcode::G_AND);
3419
3420 Register Dst = MI.getOperand(0).getReg();
3421 LLT Ty = MRI.getType(Dst);
3422
3423 Register R;
3424 int64_t C1;
3425 int64_t C2;
3426 if (!mi_match(
3427 Dst, MRI,
3428 m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
3429 return false;
3430
3431 MatchInfo = [=](MachineIRBuilder &B) {
3432 if (C1 & C2) {
3433 B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
3434 return;
3435 }
3436 auto Zero = B.buildConstant(Ty, 0);
3437 replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
3438 };
3439 return true;
3440}
3441
3443 Register &Replacement) const {
3444 // Given
3445 //
3446 // %y:_(sN) = G_SOMETHING
3447 // %x:_(sN) = G_SOMETHING
3448 // %res:_(sN) = G_AND %x, %y
3449 //
3450 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3451 //
3452 // Patterns like this can appear as a result of legalization. E.g.
3453 //
3454 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3455 // %one:_(s32) = G_CONSTANT i32 1
3456 // %and:_(s32) = G_AND %cmp, %one
3457 //
3458 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3459 assert(MI.getOpcode() == TargetOpcode::G_AND);
3460 if (!VT)
3461 return false;
3462
3463 Register AndDst = MI.getOperand(0).getReg();
3464 Register LHS = MI.getOperand(1).getReg();
3465 Register RHS = MI.getOperand(2).getReg();
3466
3467 // Check the RHS (maybe a constant) first, and if we have no KnownBits there,
3468 // we can't do anything. If we do, then it depends on whether we have
3469 // KnownBits on the LHS.
3470 KnownBits RHSBits = VT->getKnownBits(RHS);
3471 if (RHSBits.isUnknown())
3472 return false;
3473
3474 KnownBits LHSBits = VT->getKnownBits(LHS);
3475
3476 // Check that x & Mask == x.
3477 // x & 1 == x, always
3478 // x & 0 == x, only if x is also 0
3479 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3480 //
3481 // Check if we can replace AndDst with the LHS of the G_AND
3482 if (canReplaceReg(AndDst, LHS, MRI) &&
3483 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3484 Replacement = LHS;
3485 return true;
3486 }
3487
3488 // Check if we can replace AndDst with the RHS of the G_AND
3489 if (canReplaceReg(AndDst, RHS, MRI) &&
3490 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3491 Replacement = RHS;
3492 return true;
3493 }
3494
3495 return false;
3496}
3497
3499 Register &Replacement) const {
3500 // Given
3501 //
3502 // %y:_(sN) = G_SOMETHING
3503 // %x:_(sN) = G_SOMETHING
3504 // %res:_(sN) = G_OR %x, %y
3505 //
3506 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3507 assert(MI.getOpcode() == TargetOpcode::G_OR);
3508 if (!VT)
3509 return false;
3510
3511 Register OrDst = MI.getOperand(0).getReg();
3512 Register LHS = MI.getOperand(1).getReg();
3513 Register RHS = MI.getOperand(2).getReg();
3514
3515 KnownBits LHSBits = VT->getKnownBits(LHS);
3516 KnownBits RHSBits = VT->getKnownBits(RHS);
3517
3518 // Check that x | Mask == x.
3519 // x | 0 == x, always
3520 // x | 1 == x, only if x is also 1
3521 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3522 //
3523 // Check if we can replace OrDst with the LHS of the G_OR
3524 if (canReplaceReg(OrDst, LHS, MRI) &&
3525 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3526 Replacement = LHS;
3527 return true;
3528 }
3529
3530 // Check if we can replace OrDst with the RHS of the G_OR
3531 if (canReplaceReg(OrDst, RHS, MRI) &&
3532 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3533 Replacement = RHS;
3534 return true;
3535 }
3536
3537 return false;
3538}
3539
3541 // If the input is already sign extended, just drop the extension.
3542 Register Src = MI.getOperand(1).getReg();
3543 unsigned ExtBits = MI.getOperand(2).getImm();
3544 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3545 return VT->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3546}
3547
3548static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3549 int64_t Cst, bool IsVector, bool IsFP) {
3550 // For i1, Cst will always be -1 regardless of boolean contents.
3551 return (ScalarSizeBits == 1 && Cst == -1) ||
3552 isConstTrueVal(TLI, Cst, IsVector, IsFP);
3553}
3554
3555// This pattern aims to match the following shape to avoid extra mov
3556// instructions
3557// G_BUILD_VECTOR(
3558// G_UNMERGE_VALUES(src, 0)
3559// G_UNMERGE_VALUES(src, 1)
3560// G_IMPLICIT_DEF
3561// G_IMPLICIT_DEF
3562// )
3563// ->
3564// G_CONCAT_VECTORS(
3565// src,
3566// undef
3567// )
3570 Register &UnmergeSrc) const {
3571 auto &BV = cast<GBuildVector>(MI);
3572
3573 unsigned BuildUseCount = BV.getNumSources();
3574 if (BuildUseCount % 2 != 0)
3575 return false;
3576
3577 unsigned NumUnmerge = BuildUseCount / 2;
3578
3579 auto *Unmerge = getOpcodeDef<GUnmerge>(BV.getSourceReg(0), MRI);
3580
3581 // Check the first operand is an unmerge and has the correct number of
3582 // operands
3583 if (!Unmerge || Unmerge->getNumDefs() != NumUnmerge)
3584 return false;
3585
3586 UnmergeSrc = Unmerge->getSourceReg();
3587
3588 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3589 LLT UnmergeSrcTy = MRI.getType(UnmergeSrc);
3590
3591 if (!UnmergeSrcTy.isVector())
3592 return false;
3593
3594 // Ensure we only generate legal instructions post-legalizer
3595 if (!IsPreLegalize &&
3596 !isLegal({TargetOpcode::G_CONCAT_VECTORS, {DstTy, UnmergeSrcTy}}))
3597 return false;
3598
3599 // Check that all of the operands before the midpoint come from the same
3600 // unmerge and are in the same order as they are used in the build_vector
3601 for (unsigned I = 0; I < NumUnmerge; ++I) {
3602 auto MaybeUnmergeReg = BV.getSourceReg(I);
3603 auto *LoopUnmerge = getOpcodeDef<GUnmerge>(MaybeUnmergeReg, MRI);
3604
3605 if (!LoopUnmerge || LoopUnmerge != Unmerge)
3606 return false;
3607
3608 if (LoopUnmerge->getOperand(I).getReg() != MaybeUnmergeReg)
3609 return false;
3610 }
3611
3612 // Check that all of the unmerged values are used
3613 if (Unmerge->getNumDefs() != NumUnmerge)
3614 return false;
3615
3616 // Check that all of the operands after the mid point are undefs.
3617 for (unsigned I = NumUnmerge; I < BuildUseCount; ++I) {
3618 auto *Undef = getDefIgnoringCopies(BV.getSourceReg(I), MRI);
3619
3620 if (Undef->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)
3621 return false;
3622 }
3623
3624 return true;
3625}
3626
3630 Register &UnmergeSrc) const {
3631 assert(UnmergeSrc && "Expected there to be one matching G_UNMERGE_VALUES");
3632 B.setInstrAndDebugLoc(MI);
3633
3634 Register UndefVec = B.buildUndef(MRI.getType(UnmergeSrc)).getReg(0);
3635 B.buildConcatVectors(MI.getOperand(0), {UnmergeSrc, UndefVec});
3636
3637 MI.eraseFromParent();
3638}
3639
3640// This combine tries to reduce the number of scalarised G_TRUNC instructions by
3641// using vector truncates instead
3642//
3643// EXAMPLE:
3644// %a(i32), %b(i32) = G_UNMERGE_VALUES %src(<2 x i32>)
3645// %T_a(i16) = G_TRUNC %a(i32)
3646// %T_b(i16) = G_TRUNC %b(i32)
3647// %Undef(i16) = G_IMPLICIT_DEF(i16)
3648// %dst(v4i16) = G_BUILD_VECTORS %T_a(i16), %T_b(i16), %Undef(i16), %Undef(i16)
3649//
3650// ===>
3651// %Undef(<2 x i32>) = G_IMPLICIT_DEF(<2 x i32>)
3652// %Mid(<4 x s32>) = G_CONCAT_VECTORS %src(<2 x i32>), %Undef(<2 x i32>)
3653// %dst(<4 x s16>) = G_TRUNC %Mid(<4 x s32>)
3654//
3655// Only matches sources made up of G_TRUNCs followed by G_IMPLICIT_DEFs
3657 Register &MatchInfo) const {
3658 auto BuildMI = cast<GBuildVector>(&MI);
3659 unsigned NumOperands = BuildMI->getNumSources();
3660 LLT DstTy = MRI.getType(BuildMI->getReg(0));
3661
3662 // Check the G_BUILD_VECTOR sources
3663 unsigned I;
3664 MachineInstr *UnmergeMI = nullptr;
3665
3666 // Check all source TRUNCs come from the same UNMERGE instruction
3667 // and that the element order matches (BUILD_VECTOR position I
3668 // corresponds to UNMERGE result I)
3669 for (I = 0; I < NumOperands; ++I) {
3670 auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3671 auto SrcMIOpc = SrcMI->getOpcode();
3672
3673 // Check if the G_TRUNC instructions all come from the same MI
3674 if (SrcMIOpc == TargetOpcode::G_TRUNC) {
3675 Register TruncSrcReg = SrcMI->getOperand(1).getReg();
3676 if (!UnmergeMI) {
3677 UnmergeMI = MRI.getVRegDef(TruncSrcReg);
3678 if (UnmergeMI->getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
3679 return false;
3680 } else {
3681 auto UnmergeSrcMI = MRI.getVRegDef(TruncSrcReg);
3682 if (UnmergeMI != UnmergeSrcMI)
3683 return false;
3684 }
3685 // Verify element ordering: BUILD_VECTOR position I must use
3686 // UNMERGE result I, otherwise the fold would lose element reordering
3687 if (UnmergeMI->getOperand(I).getReg() != TruncSrcReg)
3688 return false;
3689 } else {
3690 break;
3691 }
3692 }
3693 if (I < 2)
3694 return false;
3695
3696 // Check the remaining source elements are only G_IMPLICIT_DEF
3697 for (; I < NumOperands; ++I) {
3698 auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3699 auto SrcMIOpc = SrcMI->getOpcode();
3700
3701 if (SrcMIOpc != TargetOpcode::G_IMPLICIT_DEF)
3702 return false;
3703 }
3704
3705 // Check the size of unmerge source
3706 MatchInfo = cast<GUnmerge>(UnmergeMI)->getSourceReg();
3707 LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3708 if (!DstTy.getElementCount().isKnownMultipleOf(UnmergeSrcTy.getNumElements()))
3709 return false;
3710
3711 // Check the unmerge source and destination element types match
3712 LLT UnmergeSrcEltTy = UnmergeSrcTy.getElementType();
3713 Register UnmergeDstReg = UnmergeMI->getOperand(0).getReg();
3714 LLT UnmergeDstEltTy = MRI.getType(UnmergeDstReg);
3715 if (UnmergeSrcEltTy != UnmergeDstEltTy)
3716 return false;
3717
3718 // Only generate legal instructions post-legalizer
3719 if (!IsPreLegalize) {
3720 LLT MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3721
3722 if (DstTy.getElementCount() != UnmergeSrcTy.getElementCount() &&
3723 !isLegal({TargetOpcode::G_CONCAT_VECTORS, {MidTy, UnmergeSrcTy}}))
3724 return false;
3725
3726 if (!isLegal({TargetOpcode::G_TRUNC, {DstTy, MidTy}}))
3727 return false;
3728 }
3729
3730 return true;
3731}
3732
3734 Register &MatchInfo) const {
3735 Register MidReg;
3736 auto BuildMI = cast<GBuildVector>(&MI);
3737 Register DstReg = BuildMI->getReg(0);
3738 LLT DstTy = MRI.getType(DstReg);
3739 LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3740 unsigned DstTyNumElt = DstTy.getNumElements();
3741 unsigned UnmergeSrcTyNumElt = UnmergeSrcTy.getNumElements();
3742
3743 // No need to pad vector if only G_TRUNC is needed
3744 if (DstTyNumElt / UnmergeSrcTyNumElt == 1) {
3745 MidReg = MatchInfo;
3746 } else {
3747 Register UndefReg = Builder.buildUndef(UnmergeSrcTy).getReg(0);
3748 SmallVector<Register> ConcatRegs = {MatchInfo};
3749 for (unsigned I = 1; I < DstTyNumElt / UnmergeSrcTyNumElt; ++I)
3750 ConcatRegs.push_back(UndefReg);
3751
3752 auto MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3753 MidReg = Builder.buildConcatVectors(MidTy, ConcatRegs).getReg(0);
3754 }
3755
3756 Builder.buildTrunc(DstReg, MidReg);
3757 MI.eraseFromParent();
3758}
3759
3761 MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const {
3762 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3763 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3764 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3765 Register XorSrc;
3766 Register CstReg;
3767 // We match xor(src, true) here.
3768 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3769 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3770 return false;
3771
3772 if (!MRI.hasOneNonDBGUse(XorSrc))
3773 return false;
3774
3775 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3776 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3777 // list of tree nodes to visit.
3778 RegsToNegate.push_back(XorSrc);
3779 // Remember whether the comparisons are all integer or all floating point.
3780 bool IsInt = false;
3781 bool IsFP = false;
3782 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3783 Register Reg = RegsToNegate[I];
3784 if (!MRI.hasOneNonDBGUse(Reg))
3785 return false;
3786 MachineInstr *Def = MRI.getVRegDef(Reg);
3787 switch (Def->getOpcode()) {
3788 default:
3789 // Don't match if the tree contains anything other than ANDs, ORs and
3790 // comparisons.
3791 return false;
3792 case TargetOpcode::G_ICMP:
3793 if (IsFP)
3794 return false;
3795 IsInt = true;
3796 // When we apply the combine we will invert the predicate.
3797 break;
3798 case TargetOpcode::G_FCMP:
3799 if (IsInt)
3800 return false;
3801 IsFP = true;
3802 // When we apply the combine we will invert the predicate.
3803 break;
3804 case TargetOpcode::G_AND:
3805 case TargetOpcode::G_OR:
3806 // Implement De Morgan's laws:
3807 // ~(x & y) -> ~x | ~y
3808 // ~(x | y) -> ~x & ~y
3809 // When we apply the combine we will change the opcode and recursively
3810 // negate the operands.
3811 RegsToNegate.push_back(Def->getOperand(1).getReg());
3812 RegsToNegate.push_back(Def->getOperand(2).getReg());
3813 break;
3814 }
3815 }
3816
3817 // Now we know whether the comparisons are integer or floating point, check
3818 // the constant in the xor.
3819 int64_t Cst;
3820 if (Ty.isVector()) {
3821 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3822 auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
3823 if (!MaybeCst)
3824 return false;
3825 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3826 return false;
3827 } else {
3828 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3829 return false;
3830 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3831 return false;
3832 }
3833
3834 return true;
3835}
3836
3838 MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const {
3839 for (Register Reg : RegsToNegate) {
3840 MachineInstr *Def = MRI.getVRegDef(Reg);
3841 Observer.changingInstr(*Def);
3842 // For each comparison, invert the opcode. For each AND and OR, change the
3843 // opcode.
3844 switch (Def->getOpcode()) {
3845 default:
3846 llvm_unreachable("Unexpected opcode");
3847 case TargetOpcode::G_ICMP:
3848 case TargetOpcode::G_FCMP: {
3849 MachineOperand &PredOp = Def->getOperand(1);
3852 PredOp.setPredicate(NewP);
3853 break;
3854 }
3855 case TargetOpcode::G_AND:
3856 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3857 break;
3858 case TargetOpcode::G_OR:
3859 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3860 break;
3861 }
3862 Observer.changedInstr(*Def);
3863 }
3864
3865 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3866 MI.eraseFromParent();
3867}
3868
3870 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const {
3871 // Match (xor (and x, y), y) (or any of its commuted cases)
3872 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3873 Register &X = MatchInfo.first;
3874 Register &Y = MatchInfo.second;
3875 Register AndReg = MI.getOperand(1).getReg();
3876 Register SharedReg = MI.getOperand(2).getReg();
3877
3878 // Find a G_AND on either side of the G_XOR.
3879 // Look for one of
3880 //
3881 // (xor (and x, y), SharedReg)
3882 // (xor SharedReg, (and x, y))
3883 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3884 std::swap(AndReg, SharedReg);
3885 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3886 return false;
3887 }
3888
3889 // Only do this if we'll eliminate the G_AND.
3890 if (!MRI.hasOneNonDBGUse(AndReg))
3891 return false;
3892
3893 // We can combine if SharedReg is the same as either the LHS or RHS of the
3894 // G_AND.
3895 if (Y != SharedReg)
3896 std::swap(X, Y);
3897 return Y == SharedReg;
3898}
3899
3901 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const {
3902 // Fold (xor (and x, y), y) -> (and (not x), y)
3903 Register X, Y;
3904 std::tie(X, Y) = MatchInfo;
3905 auto Not = Builder.buildNot(MRI.getType(X), X);
3906 Observer.changingInstr(MI);
3907 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3908 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3909 MI.getOperand(2).setReg(Y);
3910 Observer.changedInstr(MI);
3911}
3912
3914 auto &PtrAdd = cast<GPtrAdd>(MI);
3915 Register DstReg = PtrAdd.getReg(0);
3916 LLT Ty = MRI.getType(DstReg);
3917 const DataLayout &DL = Builder.getMF().getDataLayout();
3918
3919 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3920 return false;
3921
3922 if (Ty.isPointer()) {
3923 auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
3924 return ConstVal && *ConstVal == 0;
3925 }
3926
3927 assert(Ty.isVector() && "Expecting a vector type");
3928 const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
3929 return isBuildVectorAllZeros(*VecMI, MRI);
3930}
3931
3933 auto &PtrAdd = cast<GPtrAdd>(MI);
3934 Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
3935 PtrAdd.eraseFromParent();
3936}
3937
3938/// The second source operand is known to be a power of 2.
3940 Register DstReg = MI.getOperand(0).getReg();
3941 Register Src0 = MI.getOperand(1).getReg();
3942 Register Pow2Src1 = MI.getOperand(2).getReg();
3943 LLT Ty = MRI.getType(DstReg);
3944
3945 // Fold (urem x, pow2) -> (and x, pow2-1)
3946 auto NegOne = Builder.buildConstant(Ty, -1);
3947 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
3948 Builder.buildAnd(DstReg, Src0, Add);
3949 MI.eraseFromParent();
3950}
3951
3953 unsigned &SelectOpNo) const {
3954 Register LHS = MI.getOperand(1).getReg();
3955 Register RHS = MI.getOperand(2).getReg();
3956
3957 Register OtherOperandReg = RHS;
3958 SelectOpNo = 1;
3959 MachineInstr *Select = MRI.getVRegDef(LHS);
3960
3961 // Don't do this unless the old select is going away. We want to eliminate the
3962 // binary operator, not replace a binop with a select.
3963 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3964 !MRI.hasOneNonDBGUse(LHS)) {
3965 OtherOperandReg = LHS;
3966 SelectOpNo = 2;
3967 Select = MRI.getVRegDef(RHS);
3968 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3969 !MRI.hasOneNonDBGUse(RHS))
3970 return false;
3971 }
3972
3973 MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
3974 MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
3975
3976 if (!isConstantOrConstantVector(*SelectLHS, MRI,
3977 /*AllowFP*/ true,
3978 /*AllowOpaqueConstants*/ false))
3979 return false;
3980 if (!isConstantOrConstantVector(*SelectRHS, MRI,
3981 /*AllowFP*/ true,
3982 /*AllowOpaqueConstants*/ false))
3983 return false;
3984
3985 unsigned BinOpcode = MI.getOpcode();
3986
3987 // We know that one of the operands is a select of constants. Now verify that
3988 // the other binary operator operand is either a constant, or we can handle a
3989 // variable.
3990 bool CanFoldNonConst =
3991 (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
3992 (isNullOrNullSplat(*SelectLHS, MRI) ||
3993 isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
3994 (isNullOrNullSplat(*SelectRHS, MRI) ||
3995 isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
3996 if (CanFoldNonConst)
3997 return true;
3998
3999 return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
4000 /*AllowFP*/ true,
4001 /*AllowOpaqueConstants*/ false);
4002}
4003
4004/// \p SelectOperand is the operand in binary operator \p MI that is the select
4005/// to fold.
4007 MachineInstr &MI, const unsigned &SelectOperand) const {
4008 Register Dst = MI.getOperand(0).getReg();
4009 Register LHS = MI.getOperand(1).getReg();
4010 Register RHS = MI.getOperand(2).getReg();
4011 MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
4012
4013 Register SelectCond = Select->getOperand(1).getReg();
4014 Register SelectTrue = Select->getOperand(2).getReg();
4015 Register SelectFalse = Select->getOperand(3).getReg();
4016
4017 LLT Ty = MRI.getType(Dst);
4018 unsigned BinOpcode = MI.getOpcode();
4019
4020 Register FoldTrue, FoldFalse;
4021
4022 // We have a select-of-constants followed by a binary operator with a
4023 // constant. Eliminate the binop by pulling the constant math into the select.
4024 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
4025 if (SelectOperand == 1) {
4026 // TODO: SelectionDAG verifies this actually constant folds before
4027 // committing to the combine.
4028
4029 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
4030 FoldFalse =
4031 Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
4032 } else {
4033 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
4034 FoldFalse =
4035 Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
4036 }
4037
4038 Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
4039 MI.eraseFromParent();
4040}
4041
4042std::optional<SmallVector<Register, 8>>
4043CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
4044 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
4045 // We want to detect if Root is part of a tree which represents a bunch
4046 // of loads being merged into a larger load. We'll try to recognize patterns
4047 // like, for example:
4048 //
4049 // Reg Reg
4050 // \ /
4051 // OR_1 Reg
4052 // \ /
4053 // OR_2
4054 // \ Reg
4055 // .. /
4056 // Root
4057 //
4058 // Reg Reg Reg Reg
4059 // \ / \ /
4060 // OR_1 OR_2
4061 // \ /
4062 // \ /
4063 // ...
4064 // Root
4065 //
4066 // Each "Reg" may have been produced by a load + some arithmetic. This
4067 // function will save each of them.
4068 SmallVector<Register, 8> RegsToVisit;
4070
4071 // In the "worst" case, we're dealing with a load for each byte. So, there
4072 // are at most #bytes - 1 ORs.
4073 const unsigned MaxIter =
4074 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
4075 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
4076 if (Ors.empty())
4077 break;
4078 const MachineInstr *Curr = Ors.pop_back_val();
4079 Register OrLHS = Curr->getOperand(1).getReg();
4080 Register OrRHS = Curr->getOperand(2).getReg();
4081
4082 // In the combine, we want to elimate the entire tree.
4083 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
4084 return std::nullopt;
4085
4086 // If it's a G_OR, save it and continue to walk. If it's not, then it's
4087 // something that may be a load + arithmetic.
4088 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
4089 Ors.push_back(Or);
4090 else
4091 RegsToVisit.push_back(OrLHS);
4092 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
4093 Ors.push_back(Or);
4094 else
4095 RegsToVisit.push_back(OrRHS);
4096 }
4097
4098 // We're going to try and merge each register into a wider power-of-2 type,
4099 // so we ought to have an even number of registers.
4100 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
4101 return std::nullopt;
4102 return RegsToVisit;
4103}
4104
4105/// Helper function for findLoadOffsetsForLoadOrCombine.
4106///
4107/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
4108/// and then moving that value into a specific byte offset.
4109///
4110/// e.g. x[i] << 24
4111///
4112/// \returns The load instruction and the byte offset it is moved into.
4113static std::optional<std::pair<GZExtLoad *, int64_t>>
4114matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
4115 const MachineRegisterInfo &MRI) {
4116 assert(MRI.hasOneNonDBGUse(Reg) &&
4117 "Expected Reg to only have one non-debug use?");
4118 Register MaybeLoad;
4119 int64_t Shift;
4120 if (!mi_match(Reg, MRI,
4121 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
4122 Shift = 0;
4123 MaybeLoad = Reg;
4124 }
4125
4126 if (Shift % MemSizeInBits != 0)
4127 return std::nullopt;
4128
4129 // TODO: Handle other types of loads.
4130 auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
4131 if (!Load)
4132 return std::nullopt;
4133
4134 if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
4135 return std::nullopt;
4136
4137 return std::make_pair(Load, Shift / MemSizeInBits);
4138}
4139
4140std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
4141CombinerHelper::findLoadOffsetsForLoadOrCombine(
4143 const SmallVector<Register, 8> &RegsToVisit,
4144 const unsigned MemSizeInBits) const {
4145
4146 // Each load found for the pattern. There should be one for each RegsToVisit.
4147 SmallSetVector<const MachineInstr *, 8> Loads;
4148
4149 // The lowest index used in any load. (The lowest "i" for each x[i].)
4150 int64_t LowestIdx = INT64_MAX;
4151
4152 // The load which uses the lowest index.
4153 GZExtLoad *LowestIdxLoad = nullptr;
4154
4155 // Keeps track of the load indices we see. We shouldn't see any indices twice.
4156 SmallSet<int64_t, 8> SeenIdx;
4157
4158 // Ensure each load is in the same MBB.
4159 // TODO: Support multiple MachineBasicBlocks.
4160 MachineBasicBlock *MBB = nullptr;
4161 const MachineMemOperand *MMO = nullptr;
4162
4163 // Earliest instruction-order load in the pattern.
4164 GZExtLoad *EarliestLoad = nullptr;
4165
4166 // Latest instruction-order load in the pattern.
4167 GZExtLoad *LatestLoad = nullptr;
4168
4169 // Base pointer which every load should share.
4171
4172 // We want to find a load for each register. Each load should have some
4173 // appropriate bit twiddling arithmetic. During this loop, we will also keep
4174 // track of the load which uses the lowest index. Later, we will check if we
4175 // can use its pointer in the final, combined load.
4176 for (auto Reg : RegsToVisit) {
4177 // Find the load, and find the position that it will end up in (e.g. a
4178 // shifted) value.
4179 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
4180 if (!LoadAndPos)
4181 return std::nullopt;
4182 GZExtLoad *Load;
4183 int64_t DstPos;
4184 std::tie(Load, DstPos) = *LoadAndPos;
4185
4186 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
4187 // it is difficult to check for stores/calls/etc between loads.
4188 MachineBasicBlock *LoadMBB = Load->getParent();
4189 if (!MBB)
4190 MBB = LoadMBB;
4191 if (LoadMBB != MBB)
4192 return std::nullopt;
4193
4194 // Make sure that the MachineMemOperands of every seen load are compatible.
4195 auto &LoadMMO = Load->getMMO();
4196 if (!MMO)
4197 MMO = &LoadMMO;
4198 if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
4199 return std::nullopt;
4200
4201 // Find out what the base pointer and index for the load is.
4202 Register LoadPtr;
4203 int64_t Idx;
4204 if (!mi_match(Load->getOperand(1).getReg(), MRI,
4205 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
4206 LoadPtr = Load->getOperand(1).getReg();
4207 Idx = 0;
4208 }
4209
4210 // Don't combine things like a[i], a[i] -> a bigger load.
4211 if (!SeenIdx.insert(Idx).second)
4212 return std::nullopt;
4213
4214 // Every load must share the same base pointer; don't combine things like:
4215 //
4216 // a[i], b[i + 1] -> a bigger load.
4217 if (!BasePtr.isValid())
4218 BasePtr = LoadPtr;
4219 if (BasePtr != LoadPtr)
4220 return std::nullopt;
4221
4222 if (Idx < LowestIdx) {
4223 LowestIdx = Idx;
4224 LowestIdxLoad = Load;
4225 }
4226
4227 // Keep track of the byte offset that this load ends up at. If we have seen
4228 // the byte offset, then stop here. We do not want to combine:
4229 //
4230 // a[i] << 16, a[i + k] << 16 -> a bigger load.
4231 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
4232 return std::nullopt;
4233 Loads.insert(Load);
4234
4235 // Keep track of the position of the earliest/latest loads in the pattern.
4236 // We will check that there are no load fold barriers between them later
4237 // on.
4238 //
4239 // FIXME: Is there a better way to check for load fold barriers?
4240 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
4241 EarliestLoad = Load;
4242 if (!LatestLoad || dominates(*LatestLoad, *Load))
4243 LatestLoad = Load;
4244 }
4245
4246 // We found a load for each register. Let's check if each load satisfies the
4247 // pattern.
4248 assert(Loads.size() == RegsToVisit.size() &&
4249 "Expected to find a load for each register?");
4250 assert(EarliestLoad != LatestLoad && EarliestLoad &&
4251 LatestLoad && "Expected at least two loads?");
4252
4253 // Check if there are any stores, calls, etc. between any of the loads. If
4254 // there are, then we can't safely perform the combine.
4255 //
4256 // MaxIter is chosen based off the (worst case) number of iterations it
4257 // typically takes to succeed in the LLVM test suite plus some padding.
4258 //
4259 // FIXME: Is there a better way to check for load fold barriers?
4260 const unsigned MaxIter = 20;
4261 unsigned Iter = 0;
4262 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
4263 LatestLoad->getIterator())) {
4264 if (Loads.count(&MI))
4265 continue;
4266 if (MI.isLoadFoldBarrier())
4267 return std::nullopt;
4268 if (Iter++ == MaxIter)
4269 return std::nullopt;
4270 }
4271
4272 return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
4273}
4274
4277 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4278 assert(MI.getOpcode() == TargetOpcode::G_OR);
4279 MachineFunction &MF = *MI.getMF();
4280 // Assuming a little-endian target, transform:
4281 // s8 *a = ...
4282 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
4283 // =>
4284 // s32 val = *((i32)a)
4285 //
4286 // s8 *a = ...
4287 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
4288 // =>
4289 // s32 val = BSWAP(*((s32)a))
4290 Register Dst = MI.getOperand(0).getReg();
4291 LLT Ty = MRI.getType(Dst);
4292 if (Ty.isVector())
4293 return false;
4294
4295 // We need to combine at least two loads into this type. Since the smallest
4296 // possible load is into a byte, we need at least a 16-bit wide type.
4297 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
4298 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
4299 return false;
4300
4301 // Match a collection of non-OR instructions in the pattern.
4302 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
4303 if (!RegsToVisit)
4304 return false;
4305
4306 // We have a collection of non-OR instructions. Figure out how wide each of
4307 // the small loads should be based off of the number of potential loads we
4308 // found.
4309 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
4310 if (NarrowMemSizeInBits % 8 != 0)
4311 return false;
4312
4313 // Check if each register feeding into each OR is a load from the same
4314 // base pointer + some arithmetic.
4315 //
4316 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
4317 //
4318 // Also verify that each of these ends up putting a[i] into the same memory
4319 // offset as a load into a wide type would.
4321 GZExtLoad *LowestIdxLoad, *LatestLoad;
4322 int64_t LowestIdx;
4323 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
4324 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
4325 if (!MaybeLoadInfo)
4326 return false;
4327 std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
4328
4329 // We have a bunch of loads being OR'd together. Using the addresses + offsets
4330 // we found before, check if this corresponds to a big or little endian byte
4331 // pattern. If it does, then we can represent it using a load + possibly a
4332 // BSWAP.
4333 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
4334 std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
4335 if (!IsBigEndian)
4336 return false;
4337 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
4338 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
4339 return false;
4340
4341 // Make sure that the load from the lowest index produces offset 0 in the
4342 // final value.
4343 //
4344 // This ensures that we won't combine something like this:
4345 //
4346 // load x[i] -> byte 2
4347 // load x[i+1] -> byte 0 ---> wide_load x[i]
4348 // load x[i+2] -> byte 1
4349 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
4350 const unsigned ZeroByteOffset =
4351 *IsBigEndian
4352 ? bigEndianByteAt(NumLoadsInTy, 0)
4353 : littleEndianByteAt(NumLoadsInTy, 0);
4354 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
4355 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
4356 ZeroOffsetIdx->second != LowestIdx)
4357 return false;
4358
4359 // We wil reuse the pointer from the load which ends up at byte offset 0. It
4360 // may not use index 0.
4361 Register Ptr = LowestIdxLoad->getPointerReg();
4362 const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
4363 LegalityQuery::MemDesc MMDesc(MMO);
4364 MMDesc.MemoryTy = Ty;
4366 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
4367 return false;
4368 auto PtrInfo = MMO.getPointerInfo();
4369 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
4370
4371 // Load must be allowed and fast on the target.
4373 auto &DL = MF.getDataLayout();
4374 unsigned Fast = 0;
4375 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
4376 !Fast)
4377 return false;
4378
4379 MatchInfo = [=](MachineIRBuilder &MIB) {
4380 MIB.setInstrAndDebugLoc(*LatestLoad);
4381 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
4382 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
4383 if (NeedsBSwap)
4384 MIB.buildBSwap(Dst, LoadDst);
4385 };
4386 return true;
4387}
4388
4390 MachineInstr *&ExtMI) const {
4391 auto &PHI = cast<GPhi>(MI);
4392 Register DstReg = PHI.getReg(0);
4393
4394 // TODO: Extending a vector may be expensive, don't do this until heuristics
4395 // are better.
4396 if (MRI.getType(DstReg).isVector())
4397 return false;
4398
4399 // Try to match a phi, whose only use is an extend.
4400 if (!MRI.hasOneNonDBGUse(DstReg))
4401 return false;
4402 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
4403 switch (ExtMI->getOpcode()) {
4404 case TargetOpcode::G_ANYEXT:
4405 return true; // G_ANYEXT is usually free.
4406 case TargetOpcode::G_ZEXT:
4407 case TargetOpcode::G_SEXT:
4408 break;
4409 default:
4410 return false;
4411 }
4412
4413 // If the target is likely to fold this extend away, don't propagate.
4414 if (Builder.getTII().isExtendLikelyToBeFolded(*ExtMI, MRI))
4415 return false;
4416
4417 // We don't want to propagate the extends unless there's a good chance that
4418 // they'll be optimized in some way.
4419 // Collect the unique incoming values.
4421 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4422 auto *DefMI = getDefIgnoringCopies(PHI.getIncomingValue(I), MRI);
4423 switch (DefMI->getOpcode()) {
4424 case TargetOpcode::G_LOAD:
4425 case TargetOpcode::G_TRUNC:
4426 case TargetOpcode::G_SEXT:
4427 case TargetOpcode::G_ZEXT:
4428 case TargetOpcode::G_ANYEXT:
4429 case TargetOpcode::G_CONSTANT:
4430 InSrcs.insert(DefMI);
4431 // Don't try to propagate if there are too many places to create new
4432 // extends, chances are it'll increase code size.
4433 if (InSrcs.size() > 2)
4434 return false;
4435 break;
4436 default:
4437 return false;
4438 }
4439 }
4440 return true;
4441}
4442
4444 MachineInstr *&ExtMI) const {
4445 auto &PHI = cast<GPhi>(MI);
4446 Register DstReg = ExtMI->getOperand(0).getReg();
4447 LLT ExtTy = MRI.getType(DstReg);
4448
4449 // Propagate the extension into the block of each incoming reg's block.
4450 // Use a SetVector here because PHIs can have duplicate edges, and we want
4451 // deterministic iteration order.
4454 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4455 auto SrcReg = PHI.getIncomingValue(I);
4456 auto *SrcMI = MRI.getVRegDef(SrcReg);
4457 if (!SrcMIs.insert(SrcMI))
4458 continue;
4459
4460 // Build an extend after each src inst.
4461 auto *MBB = SrcMI->getParent();
4462 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
4463 if (InsertPt != MBB->end() && InsertPt->isPHI())
4464 InsertPt = MBB->getFirstNonPHI();
4465
4466 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
4467 Builder.setDebugLoc(MI.getDebugLoc());
4468 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, SrcReg);
4469 OldToNewSrcMap[SrcMI] = NewExt;
4470 }
4471
4472 // Create a new phi with the extended inputs.
4473 Builder.setInstrAndDebugLoc(MI);
4474 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
4475 NewPhi.addDef(DstReg);
4476 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
4477 if (!MO.isReg()) {
4478 NewPhi.addMBB(MO.getMBB());
4479 continue;
4480 }
4481 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
4482 NewPhi.addUse(NewSrc->getOperand(0).getReg());
4483 }
4484 Builder.insertInstr(NewPhi);
4485 ExtMI->eraseFromParent();
4486}
4487
4489 Register &Reg) const {
4490 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
4491 // If we have a constant index, look for a G_BUILD_VECTOR source
4492 // and find the source register that the index maps to.
4493 Register SrcVec = MI.getOperand(1).getReg();
4494 LLT SrcTy = MRI.getType(SrcVec);
4495 if (SrcTy.isScalableVector())
4496 return false;
4497
4498 auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
4499 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
4500 return false;
4501
4502 unsigned VecIdx = Cst->Value.getZExtValue();
4503
4504 // Check if we have a build_vector or build_vector_trunc with an optional
4505 // trunc in front.
4506 MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec);
4507 if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) {
4508 SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg());
4509 }
4510
4511 if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
4512 SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC)
4513 return false;
4514
4515 EVT Ty(getMVTForLLT(SrcTy));
4516 if (!MRI.hasOneNonDBGUse(SrcVec) &&
4517 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
4518 return false;
4519
4520 Reg = SrcVecMI->getOperand(VecIdx + 1).getReg();
4521 return true;
4522}
4523
4525 Register &Reg) const {
4526 // Check the type of the register, since it may have come from a
4527 // G_BUILD_VECTOR_TRUNC.
4528 LLT ScalarTy = MRI.getType(Reg);
4529 Register DstReg = MI.getOperand(0).getReg();
4530 LLT DstTy = MRI.getType(DstReg);
4531
4532 if (ScalarTy != DstTy) {
4533 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
4534 Builder.buildTrunc(DstReg, Reg);
4535 MI.eraseFromParent();
4536 return;
4537 }
4539}
4540
4543 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const {
4544 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4545 // This combine tries to find build_vector's which have every source element
4546 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
4547 // the masked load scalarization is run late in the pipeline. There's already
4548 // a combine for a similar pattern starting from the extract, but that
4549 // doesn't attempt to do it if there are multiple uses of the build_vector,
4550 // which in this case is true. Starting the combine from the build_vector
4551 // feels more natural than trying to find sibling nodes of extracts.
4552 // E.g.
4553 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
4554 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
4555 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
4556 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
4557 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
4558 // ==>
4559 // replace ext{1,2,3,4} with %s{1,2,3,4}
4560
4561 Register DstReg = MI.getOperand(0).getReg();
4562 LLT DstTy = MRI.getType(DstReg);
4563 unsigned NumElts = DstTy.getNumElements();
4564
4565 SmallBitVector ExtractedElts(NumElts);
4566 for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
4567 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
4568 return false;
4569 auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
4570 if (!Cst)
4571 return false;
4572 unsigned Idx = Cst->getZExtValue();
4573 if (Idx >= NumElts)
4574 return false; // Out of range.
4575 ExtractedElts.set(Idx);
4576 SrcDstPairs.emplace_back(
4577 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
4578 }
4579 // Match if every element was extracted.
4580 return ExtractedElts.all();
4581}
4582
4585 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const {
4586 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4587 for (auto &Pair : SrcDstPairs) {
4588 auto *ExtMI = Pair.second;
4589 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
4590 ExtMI->eraseFromParent();
4591 }
4592 MI.eraseFromParent();
4593}
4594
4597 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4598 applyBuildFnNoErase(MI, MatchInfo);
4599 MI.eraseFromParent();
4600}
4601
4604 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4605 MatchInfo(Builder);
4606}
4607
4609 bool AllowScalarConstants,
4610 BuildFnTy &MatchInfo) const {
4611 assert(MI.getOpcode() == TargetOpcode::G_OR);
4612
4613 Register Dst = MI.getOperand(0).getReg();
4614 LLT Ty = MRI.getType(Dst);
4615 unsigned BitWidth = Ty.getScalarSizeInBits();
4616
4617 Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
4618 unsigned FshOpc = 0;
4619
4620 // Match (or (shl ...), (lshr ...)).
4621 if (!mi_match(Dst, MRI,
4622 // m_GOr() handles the commuted version as well.
4623 m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
4624 m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
4625 return false;
4626
4627 // Given constants C0 and C1 such that C0 + C1 is bit-width:
4628 // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
4629 int64_t CstShlAmt = 0, CstLShrAmt;
4630 if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
4631 mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
4632 CstShlAmt + CstLShrAmt == BitWidth) {
4633 FshOpc = TargetOpcode::G_FSHR;
4634 Amt = LShrAmt;
4635 } else if (mi_match(LShrAmt, MRI,
4637 ShlAmt == Amt) {
4638 // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
4639 FshOpc = TargetOpcode::G_FSHL;
4640 } else if (mi_match(ShlAmt, MRI,
4642 LShrAmt == Amt) {
4643 // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
4644 FshOpc = TargetOpcode::G_FSHR;
4645 } else {
4646 return false;
4647 }
4648
4649 LLT AmtTy = MRI.getType(Amt);
4650 if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}) &&
4651 (!AllowScalarConstants || CstShlAmt == 0 || !Ty.isScalar()))
4652 return false;
4653
4654 MatchInfo = [=](MachineIRBuilder &B) {
4655 B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
4656 };
4657 return true;
4658}
4659
4660/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
4662 unsigned Opc = MI.getOpcode();
4663 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4664 Register X = MI.getOperand(1).getReg();
4665 Register Y = MI.getOperand(2).getReg();
4666 if (X != Y)
4667 return false;
4668 unsigned RotateOpc =
4669 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
4670 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
4671}
4672
4674 unsigned Opc = MI.getOpcode();
4675 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4676 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
4677 Observer.changingInstr(MI);
4678 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
4679 : TargetOpcode::G_ROTR));
4680 MI.removeOperand(2);
4681 Observer.changedInstr(MI);
4682}
4683
4684// Fold (rot x, c) -> (rot x, c % BitSize)
4686 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4687 MI.getOpcode() == TargetOpcode::G_ROTR);
4688 unsigned Bitsize =
4689 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4690 Register AmtReg = MI.getOperand(2).getReg();
4691 bool OutOfRange = false;
4692 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
4693 if (auto *CI = dyn_cast<ConstantInt>(C))
4694 OutOfRange |= CI->getValue().uge(Bitsize);
4695 return true;
4696 };
4697 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
4698}
4699
4701 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4702 MI.getOpcode() == TargetOpcode::G_ROTR);
4703 unsigned Bitsize =
4704 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4705 Register Amt = MI.getOperand(2).getReg();
4706 LLT AmtTy = MRI.getType(Amt);
4707 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
4708 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
4709 Observer.changingInstr(MI);
4710 MI.getOperand(2).setReg(Amt);
4711 Observer.changedInstr(MI);
4712}
4713
4715 int64_t &MatchInfo) const {
4716 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4717 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4718
4719 // We want to avoid calling KnownBits on the LHS if possible, as this combine
4720 // has no filter and runs on every G_ICMP instruction. We can avoid calling
4721 // KnownBits on the LHS in two cases:
4722 //
4723 // - The RHS is unknown: Constants are always on RHS. If the RHS is unknown
4724 // we cannot do any transforms so we can safely bail out early.
4725 // - The RHS is zero: we don't need to know the LHS to do unsigned <0 and
4726 // >=0.
4727 auto KnownRHS = VT->getKnownBits(MI.getOperand(3).getReg());
4728 if (KnownRHS.isUnknown())
4729 return false;
4730
4731 std::optional<bool> KnownVal;
4732 if (KnownRHS.isZero()) {
4733 // ? uge 0 -> always true
4734 // ? ult 0 -> always false
4735 if (Pred == CmpInst::ICMP_UGE)
4736 KnownVal = true;
4737 else if (Pred == CmpInst::ICMP_ULT)
4738 KnownVal = false;
4739 }
4740
4741 if (!KnownVal) {
4742 auto KnownLHS = VT->getKnownBits(MI.getOperand(2).getReg());
4743 KnownVal = ICmpInst::compare(KnownLHS, KnownRHS, Pred);
4744 }
4745
4746 if (!KnownVal)
4747 return false;
4748 MatchInfo =
4749 *KnownVal
4751 /*IsVector = */
4752 MRI.getType(MI.getOperand(0).getReg()).isVector(),
4753 /* IsFP = */ false)
4754 : 0;
4755 return true;
4756}
4757
4760 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4761 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4762 // Given:
4763 //
4764 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4765 // %cmp = G_ICMP ne %x, 0
4766 //
4767 // Or:
4768 //
4769 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4770 // %cmp = G_ICMP eq %x, 1
4771 //
4772 // We can replace %cmp with %x assuming true is 1 on the target.
4773 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4774 if (!CmpInst::isEquality(Pred))
4775 return false;
4776 Register Dst = MI.getOperand(0).getReg();
4777 LLT DstTy = MRI.getType(Dst);
4779 /* IsFP = */ false) != 1)
4780 return false;
4781 int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
4782 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
4783 return false;
4784 Register LHS = MI.getOperand(2).getReg();
4785 auto KnownLHS = VT->getKnownBits(LHS);
4786 if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
4787 return false;
4788 // Make sure replacing Dst with the LHS is a legal operation.
4789 LLT LHSTy = MRI.getType(LHS);
4790 unsigned LHSSize = LHSTy.getSizeInBits();
4791 unsigned DstSize = DstTy.getSizeInBits();
4792 unsigned Op = TargetOpcode::COPY;
4793 if (DstSize != LHSSize)
4794 Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
4795 if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
4796 return false;
4797 MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
4798 return true;
4799}
4800
4801// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
4804 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4805 assert(MI.getOpcode() == TargetOpcode::G_AND);
4806
4807 // Ignore vector types to simplify matching the two constants.
4808 // TODO: do this for vectors and scalars via a demanded bits analysis.
4809 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4810 if (Ty.isVector())
4811 return false;
4812
4813 Register Src;
4814 Register AndMaskReg;
4815 int64_t AndMaskBits;
4816 int64_t OrMaskBits;
4817 if (!mi_match(MI, MRI,
4818 m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
4819 m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
4820 return false;
4821
4822 // Check if OrMask could turn on any bits in Src.
4823 if (AndMaskBits & OrMaskBits)
4824 return false;
4825
4826 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4827 Observer.changingInstr(MI);
4828 // Canonicalize the result to have the constant on the RHS.
4829 if (MI.getOperand(1).getReg() == AndMaskReg)
4830 MI.getOperand(2).setReg(AndMaskReg);
4831 MI.getOperand(1).setReg(Src);
4832 Observer.changedInstr(MI);
4833 };
4834 return true;
4835}
4836
4837/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4840 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4841 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
4842 Register Dst = MI.getOperand(0).getReg();
4843 Register Src = MI.getOperand(1).getReg();
4844 LLT Ty = MRI.getType(Src);
4846 if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4847 return false;
4848 int64_t Width = MI.getOperand(2).getImm();
4849 Register ShiftSrc;
4850 int64_t ShiftImm;
4851 if (!mi_match(
4852 Src, MRI,
4853 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
4854 m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
4855 return false;
4856 if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
4857 return false;
4858
4859 MatchInfo = [=](MachineIRBuilder &B) {
4860 auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
4861 auto Cst2 = B.buildConstant(ExtractTy, Width);
4862 B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4863 };
4864 return true;
4865}
4866
4867/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4869 BuildFnTy &MatchInfo) const {
4870 GAnd *And = cast<GAnd>(&MI);
4871 Register Dst = And->getReg(0);
4872 LLT Ty = MRI.getType(Dst);
4874 // Note that isLegalOrBeforeLegalizer is stricter and does not take custom
4875 // into account.
4876 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4877 return false;
4878
4879 int64_t AndImm, LSBImm;
4880 Register ShiftSrc;
4881 const unsigned Size = Ty.getScalarSizeInBits();
4882 if (!mi_match(And->getReg(0), MRI,
4883 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
4884 m_ICst(AndImm))))
4885 return false;
4886
4887 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4888 auto MaybeMask = static_cast<uint64_t>(AndImm);
4889 if (MaybeMask & (MaybeMask + 1))
4890 return false;
4891
4892 // LSB must fit within the register.
4893 if (static_cast<uint64_t>(LSBImm) >= Size)
4894 return false;
4895
4896 uint64_t Width = APInt(Size, AndImm).countr_one();
4897 MatchInfo = [=](MachineIRBuilder &B) {
4898 auto WidthCst = B.buildConstant(ExtractTy, Width);
4899 auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
4900 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4901 };
4902 return true;
4903}
4904
4907 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4908 const unsigned Opcode = MI.getOpcode();
4909 assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
4910
4911 const Register Dst = MI.getOperand(0).getReg();
4912
4913 const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
4914 ? TargetOpcode::G_SBFX
4915 : TargetOpcode::G_UBFX;
4916
4917 // Check if the type we would use for the extract is legal
4918 LLT Ty = MRI.getType(Dst);
4920 if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
4921 return false;
4922
4923 Register ShlSrc;
4924 int64_t ShrAmt;
4925 int64_t ShlAmt;
4926 const unsigned Size = Ty.getScalarSizeInBits();
4927
4928 // Try to match shr (shl x, c1), c2
4929 if (!mi_match(Dst, MRI,
4930 m_BinOp(Opcode,
4931 m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
4932 m_ICst(ShrAmt))))
4933 return false;
4934
4935 // Make sure that the shift sizes can fit a bitfield extract
4936 if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
4937 return false;
4938
4939 // Skip this combine if the G_SEXT_INREG combine could handle it
4940 if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
4941 return false;
4942
4943 // Calculate start position and width of the extract
4944 const int64_t Pos = ShrAmt - ShlAmt;
4945 const int64_t Width = Size - ShrAmt;
4946
4947 MatchInfo = [=](MachineIRBuilder &B) {
4948 auto WidthCst = B.buildConstant(ExtractTy, Width);
4949 auto PosCst = B.buildConstant(ExtractTy, Pos);
4950 B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
4951 };
4952 return true;
4953}
4954
4957 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4958 const unsigned Opcode = MI.getOpcode();
4959 assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
4960
4961 const Register Dst = MI.getOperand(0).getReg();
4962 LLT Ty = MRI.getType(Dst);
4964 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4965 return false;
4966
4967 // Try to match shr (and x, c1), c2
4968 Register AndSrc;
4969 int64_t ShrAmt;
4970 int64_t SMask;
4971 if (!mi_match(Dst, MRI,
4972 m_BinOp(Opcode,
4973 m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
4974 m_ICst(ShrAmt))))
4975 return false;
4976
4977 const unsigned Size = Ty.getScalarSizeInBits();
4978 if (ShrAmt < 0 || ShrAmt >= Size)
4979 return false;
4980
4981 // If the shift subsumes the mask, emit the 0 directly.
4982 if (0 == (SMask >> ShrAmt)) {
4983 MatchInfo = [=](MachineIRBuilder &B) {
4984 B.buildConstant(Dst, 0);
4985 };
4986 return true;
4987 }
4988
4989 // Check that ubfx can do the extraction, with no holes in the mask.
4990 uint64_t UMask = SMask;
4991 UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
4993 if (!isMask_64(UMask))
4994 return false;
4995
4996 // Calculate start position and width of the extract.
4997 const int64_t Pos = ShrAmt;
4998 const int64_t Width = llvm::countr_one(UMask) - ShrAmt;
4999
5000 // It's preferable to keep the shift, rather than form G_SBFX.
5001 // TODO: remove the G_AND via demanded bits analysis.
5002 if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
5003 return false;
5004
5005 MatchInfo = [=](MachineIRBuilder &B) {
5006 auto WidthCst = B.buildConstant(ExtractTy, Width);
5007 auto PosCst = B.buildConstant(ExtractTy, Pos);
5008 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
5009 };
5010 return true;
5011}
5012
5013bool CombinerHelper::reassociationCanBreakAddressingModePattern(
5014 MachineInstr &MI) const {
5015 auto &PtrAdd = cast<GPtrAdd>(MI);
5016
5017 Register Src1Reg = PtrAdd.getBaseReg();
5018 auto *Src1Def = getOpcodeDef<GPtrAdd>(Src1Reg, MRI);
5019 if (!Src1Def)
5020 return false;
5021
5022 Register Src2Reg = PtrAdd.getOffsetReg();
5023
5024 if (MRI.hasOneNonDBGUse(Src1Reg))
5025 return false;
5026
5027 auto C1 = getIConstantVRegVal(Src1Def->getOffsetReg(), MRI);
5028 if (!C1)
5029 return false;
5030 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
5031 if (!C2)
5032 return false;
5033
5034 const APInt &C1APIntVal = *C1;
5035 const APInt &C2APIntVal = *C2;
5036 const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
5037
5038 for (auto &UseMI : MRI.use_nodbg_instructions(PtrAdd.getReg(0))) {
5039 // This combine may end up running before ptrtoint/inttoptr combines
5040 // manage to eliminate redundant conversions, so try to look through them.
5041 MachineInstr *ConvUseMI = &UseMI;
5042 unsigned ConvUseOpc = ConvUseMI->getOpcode();
5043 while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
5044 ConvUseOpc == TargetOpcode::G_PTRTOINT) {
5045 Register DefReg = ConvUseMI->getOperand(0).getReg();
5046 if (!MRI.hasOneNonDBGUse(DefReg))
5047 break;
5048 ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
5049 ConvUseOpc = ConvUseMI->getOpcode();
5050 }
5051 auto *LdStMI = dyn_cast<GLoadStore>(ConvUseMI);
5052 if (!LdStMI)
5053 continue;
5054 // Is x[offset2] already not a legal addressing mode? If so then
5055 // reassociating the constants breaks nothing (we test offset2 because
5056 // that's the one we hope to fold into the load or store).
5057 TargetLoweringBase::AddrMode AM;
5058 AM.HasBaseReg = true;
5059 AM.BaseOffs = C2APIntVal.getSExtValue();
5060 unsigned AS = MRI.getType(LdStMI->getPointerReg()).getAddressSpace();
5061 Type *AccessTy = getTypeForLLT(LdStMI->getMMO().getMemoryType(),
5062 PtrAdd.getMF()->getFunction().getContext());
5063 const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
5064 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
5065 AccessTy, AS))
5066 continue;
5067
5068 // Would x[offset1+offset2] still be a legal addressing mode?
5069 AM.BaseOffs = CombinedValue;
5070 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
5071 AccessTy, AS))
5072 return true;
5073 }
5074
5075 return false;
5076}
5077
5079 MachineInstr *RHS,
5080 BuildFnTy &MatchInfo) const {
5081 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
5082 Register Src1Reg = MI.getOperand(1).getReg();
5083 if (RHS->getOpcode() != TargetOpcode::G_ADD)
5084 return false;
5085 auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
5086 if (!C2)
5087 return false;
5088
5089 // If both additions are nuw, the reassociated additions are also nuw.
5090 // If the original G_PTR_ADD is additionally nusw, X and C are both not
5091 // negative, so BASE+X is between BASE and BASE+(X+C). The new G_PTR_ADDs are
5092 // therefore also nusw.
5093 // If the original G_PTR_ADD is additionally inbounds (which implies nusw),
5094 // the new G_PTR_ADDs are then also inbounds.
5095 unsigned PtrAddFlags = MI.getFlags();
5096 unsigned AddFlags = RHS->getFlags();
5097 bool IsNoUWrap = PtrAddFlags & AddFlags & MachineInstr::MIFlag::NoUWrap;
5098 bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::NoUSWrap);
5099 bool IsInBounds = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::InBounds);
5100 unsigned Flags = 0;
5101 if (IsNoUWrap)
5103 if (IsNoUSWrap)
5105 if (IsInBounds)
5107
5108 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5109 LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
5110
5111 auto NewBase =
5112 Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg(), Flags);
5113 Observer.changingInstr(MI);
5114 MI.getOperand(1).setReg(NewBase.getReg(0));
5115 MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
5116 MI.setFlags(Flags);
5117 Observer.changedInstr(MI);
5118 };
5119 return !reassociationCanBreakAddressingModePattern(MI);
5120}
5121
5123 MachineInstr *LHS,
5124 MachineInstr *RHS,
5125 BuildFnTy &MatchInfo) const {
5126 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
5127 // if and only if (G_PTR_ADD X, C) has one use.
5128 Register LHSBase;
5129 std::optional<ValueAndVReg> LHSCstOff;
5130 if (!mi_match(MI.getBaseReg(), MRI,
5131 m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
5132 return false;
5133
5134 auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
5135
5136 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
5137 // nuw and inbounds (which implies nusw), the offsets are both non-negative,
5138 // so the new G_PTR_ADDs are also inbounds.
5139 unsigned PtrAddFlags = MI.getFlags();
5140 unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
5141 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
5142 bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
5144 bool IsInBounds = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
5146 unsigned Flags = 0;
5147 if (IsNoUWrap)
5149 if (IsNoUSWrap)
5151 if (IsInBounds)
5153
5154 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5155 // When we change LHSPtrAdd's offset register we might cause it to use a reg
5156 // before its def. Sink the instruction so the outer PTR_ADD to ensure this
5157 // doesn't happen.
5158 LHSPtrAdd->moveBefore(&MI);
5159 Register RHSReg = MI.getOffsetReg();
5160 // set VReg will cause type mismatch if it comes from extend/trunc
5161 auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value);
5162 Observer.changingInstr(MI);
5163 MI.getOperand(2).setReg(NewCst.getReg(0));
5164 MI.setFlags(Flags);
5165 Observer.changedInstr(MI);
5166 Observer.changingInstr(*LHSPtrAdd);
5167 LHSPtrAdd->getOperand(2).setReg(RHSReg);
5168 LHSPtrAdd->setFlags(Flags);
5169 Observer.changedInstr(*LHSPtrAdd);
5170 };
5171 return !reassociationCanBreakAddressingModePattern(MI);
5172}
5173
5175 GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS,
5176 BuildFnTy &MatchInfo) const {
5177 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
5178 auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
5179 if (!LHSPtrAdd)
5180 return false;
5181
5182 Register Src2Reg = MI.getOperand(2).getReg();
5183 Register LHSSrc1 = LHSPtrAdd->getBaseReg();
5184 Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
5185 auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
5186 if (!C1)
5187 return false;
5188 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
5189 if (!C2)
5190 return false;
5191
5192 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
5193 // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
5194 // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
5195 // largest signed integer that fits into the index type, which is the maximum
5196 // size of allocated objects according to the IR Language Reference.
5197 unsigned PtrAddFlags = MI.getFlags();
5198 unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
5199 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
5200 bool IsInBounds =
5201 PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
5202 unsigned Flags = 0;
5203 if (IsNoUWrap)
5205 if (IsInBounds) {
5208 }
5209
5210 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5211 auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
5212 Observer.changingInstr(MI);
5213 MI.getOperand(1).setReg(LHSSrc1);
5214 MI.getOperand(2).setReg(NewCst.getReg(0));
5215 MI.setFlags(Flags);
5216 Observer.changedInstr(MI);
5217 };
5218 return !reassociationCanBreakAddressingModePattern(MI);
5219}
5220
5222 BuildFnTy &MatchInfo) const {
5223 auto &PtrAdd = cast<GPtrAdd>(MI);
5224 // We're trying to match a few pointer computation patterns here for
5225 // re-association opportunities.
5226 // 1) Isolating a constant operand to be on the RHS, e.g.:
5227 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
5228 //
5229 // 2) Folding two constants in each sub-tree as long as such folding
5230 // doesn't break a legal addressing mode.
5231 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
5232 //
5233 // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
5234 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
5235 // iif (G_PTR_ADD X, C) has one use.
5236 MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
5237 MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
5238
5239 // Try to match example 2.
5240 if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
5241 return true;
5242
5243 // Try to match example 3.
5244 if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
5245 return true;
5246
5247 // Try to match example 1.
5248 if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
5249 return true;
5250
5251 return false;
5252}
5254 Register OpLHS, Register OpRHS,
5255 BuildFnTy &MatchInfo) const {
5256 LLT OpRHSTy = MRI.getType(OpRHS);
5257 MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);
5258
5259 if (OpLHSDef->getOpcode() != Opc)
5260 return false;
5261
5262 MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
5263 Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
5264 Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();
5265
5266 // If the inner op is (X op C), pull the constant out so it can be folded with
5267 // other constants in the expression tree. Folding is not guaranteed so we
5268 // might have (C1 op C2). In that case do not pull a constant out because it
5269 // won't help and can lead to infinite loops.
5270 if (isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSRHS), MRI) &&
5271 !isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSLHS), MRI)) {
5272 if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
5273 // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
5274 MatchInfo = [=](MachineIRBuilder &B) {
5275 auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
5276 B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
5277 };
5278 return true;
5279 }
5280 if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {
5281 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
5282 // iff (op x, c1) has one use
5283 MatchInfo = [=](MachineIRBuilder &B) {
5284 auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
5285 B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
5286 };
5287 return true;
5288 }
5289 }
5290
5291 return false;
5292}
5293
5295 BuildFnTy &MatchInfo) const {
5296 // We don't check if the reassociation will break a legal addressing mode
5297 // here since pointer arithmetic is handled by G_PTR_ADD.
5298 unsigned Opc = MI.getOpcode();
5299 Register DstReg = MI.getOperand(0).getReg();
5300 Register LHSReg = MI.getOperand(1).getReg();
5301 Register RHSReg = MI.getOperand(2).getReg();
5302
5303 if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
5304 return true;
5305 if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
5306 return true;
5307 return false;
5308}
5309
5311 APInt &MatchInfo) const {
5312 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5313 Register SrcOp = MI.getOperand(1).getReg();
5314
5315 if (auto MaybeCst = ConstantFoldCastOp(MI.getOpcode(), DstTy, SrcOp, MRI)) {
5316 MatchInfo = *MaybeCst;
5317 return true;
5318 }
5319
5320 return false;
5321}
5322
5324 BuildFnTy &MatchInfo) const {
5325 Register Dst = MI.getOperand(0).getReg();
5326 auto Csts = ConstantFoldUnaryIntOp(MI.getOpcode(), MRI.getType(Dst),
5327 MI.getOperand(1).getReg(), MRI);
5328 if (Csts.empty())
5329 return false;
5330
5331 MatchInfo = [Dst, Csts = std::move(Csts)](MachineIRBuilder &B) {
5332 if (Csts.size() == 1)
5333 B.buildConstant(Dst, Csts[0]);
5334 else
5335 B.buildBuildVectorConstant(Dst, Csts);
5336 };
5337 return true;
5338}
5339
5341 APInt &MatchInfo) const {
5342 Register Op1 = MI.getOperand(1).getReg();
5343 Register Op2 = MI.getOperand(2).getReg();
5344 auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
5345 if (!MaybeCst)
5346 return false;
5347 MatchInfo = *MaybeCst;
5348 return true;
5349}
5350
5352 ConstantFP *&MatchInfo) const {
5353 Register Op1 = MI.getOperand(1).getReg();
5354 Register Op2 = MI.getOperand(2).getReg();
5355 auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
5356 if (!MaybeCst)
5357 return false;
5358 MatchInfo =
5359 ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
5360 return true;
5361}
5362
5364 ConstantFP *&MatchInfo) const {
5365 assert(MI.getOpcode() == TargetOpcode::G_FMA ||
5366 MI.getOpcode() == TargetOpcode::G_FMAD);
5367 auto [_, Op1, Op2, Op3] = MI.getFirst4Regs();
5368
5369 const ConstantFP *Op3Cst = getConstantFPVRegVal(Op3, MRI);
5370 if (!Op3Cst)
5371 return false;
5372
5373 const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI);
5374 if (!Op2Cst)
5375 return false;
5376
5377 const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI);
5378 if (!Op1Cst)
5379 return false;
5380
5381 APFloat Op1F = Op1Cst->getValueAPF();
5382 Op1F.fusedMultiplyAdd(Op2Cst->getValueAPF(), Op3Cst->getValueAPF(),
5384 MatchInfo = ConstantFP::get(MI.getMF()->getFunction().getContext(), Op1F);
5385 return true;
5386}
5387
5390 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
5391 // Look for a binop feeding into an AND with a mask:
5392 //
5393 // %add = G_ADD %lhs, %rhs
5394 // %and = G_AND %add, 000...11111111
5395 //
5396 // Check if it's possible to perform the binop at a narrower width and zext
5397 // back to the original width like so:
5398 //
5399 // %narrow_lhs = G_TRUNC %lhs
5400 // %narrow_rhs = G_TRUNC %rhs
5401 // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
5402 // %new_add = G_ZEXT %narrow_add
5403 // %and = G_AND %new_add, 000...11111111
5404 //
5405 // This can allow later combines to eliminate the G_AND if it turns out
5406 // that the mask is irrelevant.
5407 assert(MI.getOpcode() == TargetOpcode::G_AND);
5408 Register Dst = MI.getOperand(0).getReg();
5409 Register AndLHS = MI.getOperand(1).getReg();
5410 Register AndRHS = MI.getOperand(2).getReg();
5411 LLT WideTy = MRI.getType(Dst);
5412
5413 // If the potential binop has more than one use, then it's possible that one
5414 // of those uses will need its full width.
5415 if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
5416 return false;
5417
5418 // Check if the LHS feeding the AND is impacted by the high bits that we're
5419 // masking out.
5420 //
5421 // e.g. for 64-bit x, y:
5422 //
5423 // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
5424 MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
5425 if (!LHSInst)
5426 return false;
5427 unsigned LHSOpc = LHSInst->getOpcode();
5428 switch (LHSOpc) {
5429 default:
5430 return false;
5431 case TargetOpcode::G_ADD:
5432 case TargetOpcode::G_SUB:
5433 case TargetOpcode::G_MUL:
5434 case TargetOpcode::G_AND:
5435 case TargetOpcode::G_OR:
5436 case TargetOpcode::G_XOR:
5437 break;
5438 }
5439
5440 // Find the mask on the RHS.
5441 auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
5442 if (!Cst)
5443 return false;
5444 auto Mask = Cst->Value;
5445 if (!Mask.isMask())
5446 return false;
5447
5448 // No point in combining if there's nothing to truncate.
5449 unsigned NarrowWidth = Mask.countr_one();
5450 if (NarrowWidth == WideTy.getSizeInBits())
5451 return false;
5452 LLT NarrowTy = LLT::integer(NarrowWidth);
5453
5454 // Check if adding the zext + truncates could be harmful.
5455 auto &MF = *MI.getMF();
5456 const auto &TLI = getTargetLowering();
5457 LLVMContext &Ctx = MF.getFunction().getContext();
5458 if (!TLI.isTruncateFree(WideTy, NarrowTy, Ctx) ||
5459 !TLI.isZExtFree(NarrowTy, WideTy, Ctx))
5460 return false;
5461 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
5462 !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
5463 return false;
5464 Register BinOpLHS = LHSInst->getOperand(1).getReg();
5465 Register BinOpRHS = LHSInst->getOperand(2).getReg();
5466 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5467 auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
5468 auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
5469 auto NarrowBinOp =
5470 Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
5471 auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
5472 Observer.changingInstr(MI);
5473 MI.getOperand(1).setReg(Ext.getReg(0));
5474 Observer.changedInstr(MI);
5475 };
5476 return true;
5477}
5478
5480 BuildFnTy &MatchInfo) const {
5481 unsigned Opc = MI.getOpcode();
5482 assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
5483
5484 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
5485 return false;
5486
5487 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5488 Observer.changingInstr(MI);
5489 unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
5490 : TargetOpcode::G_SADDO;
5491 MI.setDesc(Builder.getTII().get(NewOpc));
5492 MI.getOperand(3).setReg(MI.getOperand(2).getReg());
5493 Observer.changedInstr(MI);
5494 };
5495 return true;
5496}
5497
5499 BuildFnTy &MatchInfo) const {
5500 // (G_*MULO x, 0) -> 0 + no carry out
5501 assert(MI.getOpcode() == TargetOpcode::G_UMULO ||
5502 MI.getOpcode() == TargetOpcode::G_SMULO);
5503 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
5504 return false;
5505 Register Dst = MI.getOperand(0).getReg();
5506 Register Carry = MI.getOperand(1).getReg();
5507 if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Dst)) ||
5508 !isConstantLegalOrBeforeLegalizer(MRI.getType(Carry)))
5509 return false;
5510 MatchInfo = [=](MachineIRBuilder &B) {
5511 B.buildConstant(Dst, 0);
5512 B.buildConstant(Carry, 0);
5513 };
5514 return true;
5515}
5516
5518 BuildFnTy &MatchInfo) const {
5519 // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
5520 // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
5521 assert(MI.getOpcode() == TargetOpcode::G_UADDE ||
5522 MI.getOpcode() == TargetOpcode::G_SADDE ||
5523 MI.getOpcode() == TargetOpcode::G_USUBE ||
5524 MI.getOpcode() == TargetOpcode::G_SSUBE);
5525 if (!mi_match(MI.getOperand(4).getReg(), MRI, m_SpecificICstOrSplat(0)))
5526 return false;
5527 MatchInfo = [&](MachineIRBuilder &B) {
5528 unsigned NewOpcode;
5529 switch (MI.getOpcode()) {
5530 case TargetOpcode::G_UADDE:
5531 NewOpcode = TargetOpcode::G_UADDO;
5532 break;
5533 case TargetOpcode::G_SADDE:
5534 NewOpcode = TargetOpcode::G_SADDO;
5535 break;
5536 case TargetOpcode::G_USUBE:
5537 NewOpcode = TargetOpcode::G_USUBO;
5538 break;
5539 case TargetOpcode::G_SSUBE:
5540 NewOpcode = TargetOpcode::G_SSUBO;
5541 break;
5542 }
5543 Observer.changingInstr(MI);
5544 MI.setDesc(B.getTII().get(NewOpcode));
5545 MI.removeOperand(4);
5546 Observer.changedInstr(MI);
5547 };
5548 return true;
5549}
5550
5552 BuildFnTy &MatchInfo) const {
5553 assert(MI.getOpcode() == TargetOpcode::G_SUB);
5554 Register Dst = MI.getOperand(0).getReg();
5555 // (x + y) - z -> x (if y == z)
5556 // (x + y) - z -> y (if x == z)
5557 Register X, Y, Z;
5558 if (mi_match(Dst, MRI, m_GSub(m_GAdd(m_Reg(X), m_Reg(Y)), m_Reg(Z)))) {
5559 Register ReplaceReg;
5560 int64_t CstX, CstY;
5561 if (Y == Z || (mi_match(Y, MRI, m_ICstOrSplat(CstY)) &&
5563 ReplaceReg = X;
5564 else if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5566 ReplaceReg = Y;
5567 if (ReplaceReg) {
5568 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, ReplaceReg); };
5569 return true;
5570 }
5571 }
5572
5573 // x - (y + z) -> 0 - y (if x == z)
5574 // x - (y + z) -> 0 - z (if x == y)
5575 if (mi_match(Dst, MRI, m_GSub(m_Reg(X), m_GAdd(m_Reg(Y), m_Reg(Z))))) {
5576 Register ReplaceReg;
5577 int64_t CstX;
5578 if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5580 ReplaceReg = Y;
5581 else if (X == Y || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5583 ReplaceReg = Z;
5584 if (ReplaceReg) {
5585 MatchInfo = [=](MachineIRBuilder &B) {
5586 auto Zero = B.buildConstant(MRI.getType(Dst), 0);
5587 B.buildSub(Dst, Zero, ReplaceReg);
5588 };
5589 return true;
5590 }
5591 }
5592 return false;
5593}
5594
5596 unsigned Opcode = MI.getOpcode();
5597 assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
5598 auto &UDivorRem = cast<GenericMachineInstr>(MI);
5599 Register Dst = UDivorRem.getReg(0);
5600 Register LHS = UDivorRem.getReg(1);
5601 Register RHS = UDivorRem.getReg(2);
5602 LLT Ty = MRI.getType(Dst);
5603 LLT ScalarTy = Ty.getScalarType();
5604 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5606 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5607
5608 auto &MIB = Builder;
5609
5610 bool UseSRL = false;
5611 SmallVector<Register, 16> Shifts, Factors;
5612 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5613 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5614
5615 auto BuildExactUDIVPattern = [&](const Constant *C) {
5616 // Don't recompute inverses for each splat element.
5617 if (IsSplat && !Factors.empty()) {
5618 Shifts.push_back(Shifts[0]);
5619 Factors.push_back(Factors[0]);
5620 return true;
5621 }
5622
5623 auto *CI = cast<ConstantInt>(C);
5624 APInt Divisor = CI->getValue();
5625 unsigned Shift = Divisor.countr_zero();
5626 if (Shift) {
5627 Divisor.lshrInPlace(Shift);
5628 UseSRL = true;
5629 }
5630
5631 // Calculate the multiplicative inverse modulo BW.
5632 APInt Factor = Divisor.multiplicativeInverse();
5633 Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5634 Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5635 return true;
5636 };
5637
5638 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5639 // Collect all magic values from the build vector.
5640 if (!matchUnaryPredicate(MRI, RHS, BuildExactUDIVPattern))
5641 llvm_unreachable("Expected unary predicate match to succeed");
5642
5643 Register Shift, Factor;
5644 if (Ty.isVector()) {
5645 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5646 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5647 } else {
5648 Shift = Shifts[0];
5649 Factor = Factors[0];
5650 }
5651
5652 Register Res = LHS;
5653
5654 if (UseSRL)
5655 Res = MIB.buildLShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5656
5657 return MIB.buildMul(Ty, Res, Factor);
5658 }
5659
5660 unsigned KnownLeadingZeros =
5661 VT ? VT->getKnownBits(LHS).countMinLeadingZeros() : 0;
5662
5663 bool UseNPQ = false;
5664 SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
5665 auto BuildUDIVPattern = [&](const Constant *C) {
5666 auto *CI = cast<ConstantInt>(C);
5667 const APInt &Divisor = CI->getValue();
5668
5669 bool SelNPQ = false;
5670 APInt Magic(Divisor.getBitWidth(), 0);
5671 unsigned PreShift = 0, PostShift = 0;
5672
5673 // Magic algorithm doesn't work for division by 1. We need to emit a select
5674 // at the end.
5675 // TODO: Use undef values for divisor of 1.
5676 if (!Divisor.isOne()) {
5677
5678 // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros
5679 // in the dividend exceeds the leading zeros for the divisor.
5682 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
5683
5684 Magic = std::move(magics.Magic);
5685
5686 assert(magics.PreShift < Divisor.getBitWidth() &&
5687 "We shouldn't generate an undefined shift!");
5688 assert(magics.PostShift < Divisor.getBitWidth() &&
5689 "We shouldn't generate an undefined shift!");
5690 assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
5691 PreShift = magics.PreShift;
5692 PostShift = magics.PostShift;
5693 SelNPQ = magics.IsAdd;
5694 }
5695
5696 PreShifts.push_back(
5697 MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
5698 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
5699 NPQFactors.push_back(
5700 MIB.buildConstant(ScalarTy,
5701 SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5702 : APInt::getZero(EltBits))
5703 .getReg(0));
5704 PostShifts.push_back(
5705 MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
5706 UseNPQ |= SelNPQ;
5707 return true;
5708 };
5709
5710 // Collect the shifts/magic values from each element.
5711 bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
5712 (void)Matched;
5713 assert(Matched && "Expected unary predicate match to succeed");
5714
5715 Register PreShift, PostShift, MagicFactor, NPQFactor;
5716 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5717 if (RHSDef) {
5718 PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
5719 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5720 NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
5721 PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
5722 } else {
5723 assert(MRI.getType(RHS).isScalar() &&
5724 "Non-build_vector operation should have been a scalar");
5725 PreShift = PreShifts[0];
5726 MagicFactor = MagicFactors[0];
5727 PostShift = PostShifts[0];
5728 }
5729
5730 Register Q = LHS;
5731 Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
5732
5733 // Multiply the numerator (operand 0) by the magic value.
5734 Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
5735
5736 if (UseNPQ) {
5737 Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
5738
5739 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5740 // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
5741 if (Ty.isVector())
5742 NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
5743 else
5744 NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
5745
5746 Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
5747 }
5748
5749 Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
5750 auto One = MIB.buildConstant(Ty, 1);
5751 auto IsOne = MIB.buildICmp(
5753 Ty.isScalar() ? LLT::integer(1) : Ty.changeElementType(LLT::integer(1)),
5754 RHS, One);
5755 auto ret = MIB.buildSelect(Ty, IsOne, LHS, Q);
5756
5757 if (Opcode == TargetOpcode::G_UREM) {
5758 auto Prod = MIB.buildMul(Ty, ret, RHS);
5759 return MIB.buildSub(Ty, LHS, Prod);
5760 }
5761 return ret;
5762}
5763
5765 unsigned Opcode = MI.getOpcode();
5766 assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
5767 Register Dst = MI.getOperand(0).getReg();
5768 Register RHS = MI.getOperand(2).getReg();
5769 LLT DstTy = MRI.getType(Dst);
5770
5771 auto &MF = *MI.getMF();
5772 AttributeList Attr = MF.getFunction().getAttributes();
5773 const auto &TLI = getTargetLowering();
5774 LLVMContext &Ctx = MF.getFunction().getContext();
5775 if (DstTy.getScalarSizeInBits() == 1 ||
5776 TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
5777 return false;
5778
5779 // Don't do this for minsize because the instruction sequence is usually
5780 // larger.
5781 if (MF.getFunction().hasMinSize())
5782 return false;
5783
5784 if (Opcode == TargetOpcode::G_UDIV &&
5786 return matchUnaryPredicate(
5787 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5788 }
5789
5790 auto *RHSDef = MRI.getVRegDef(RHS);
5791 if (!isConstantOrConstantVector(*RHSDef, MRI))
5792 return false;
5793
5794 // Don't do this if the types are not going to be legal.
5795 if (LI) {
5796 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5797 return false;
5798 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
5799 return false;
5801 {TargetOpcode::G_ICMP,
5802 {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
5803 DstTy}}))
5804 return false;
5805 if (Opcode == TargetOpcode::G_UREM &&
5806 !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
5807 return false;
5808 }
5809
5810 return matchUnaryPredicate(
5811 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5812}
5813
5815 auto *NewMI = buildUDivOrURemUsingMul(MI);
5816 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5817}
5818
5820 unsigned Opcode = MI.getOpcode();
5821 assert(Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM);
5822 Register Dst = MI.getOperand(0).getReg();
5823 Register RHS = MI.getOperand(2).getReg();
5824 LLT DstTy = MRI.getType(Dst);
5825 auto SizeInBits = DstTy.getScalarSizeInBits();
5826 LLT WideTy = DstTy.changeElementSize(SizeInBits * 2);
5827
5828 auto &MF = *MI.getMF();
5829 AttributeList Attr = MF.getFunction().getAttributes();
5830 const auto &TLI = getTargetLowering();
5831 LLVMContext &Ctx = MF.getFunction().getContext();
5832 if (DstTy.getScalarSizeInBits() < 3 ||
5833 TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
5834 return false;
5835
5836 // Don't do this for minsize because the instruction sequence is usually
5837 // larger.
5838 if (MF.getFunction().hasMinSize())
5839 return false;
5840
5841 // If the sdiv has an 'exact' flag we can use a simpler lowering.
5842 if (Opcode == TargetOpcode::G_SDIV &&
5844 return matchUnaryPredicate(
5845 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5846 }
5847
5848 auto *RHSDef = MRI.getVRegDef(RHS);
5849 if (!isConstantOrConstantVector(*RHSDef, MRI))
5850 return false;
5851
5852 // Don't do this if the types are not going to be legal.
5853 if (LI) {
5854 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5855 return false;
5856 if (!isLegal({TargetOpcode::G_SMULH, {DstTy}}) &&
5857 !isLegalOrHasWidenScalar({TargetOpcode::G_MUL, {WideTy, WideTy}}))
5858 return false;
5859 if (Opcode == TargetOpcode::G_SREM &&
5860 !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
5861 return false;
5862 }
5863
5864 return matchUnaryPredicate(
5865 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5866}
5867
5869 auto *NewMI = buildSDivOrSRemUsingMul(MI);
5870 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5871}
5872
5874 unsigned Opcode = MI.getOpcode();
5875 assert(MI.getOpcode() == TargetOpcode::G_SDIV ||
5876 Opcode == TargetOpcode::G_SREM);
5877 auto &SDivorRem = cast<GenericMachineInstr>(MI);
5878 Register Dst = SDivorRem.getReg(0);
5879 Register LHS = SDivorRem.getReg(1);
5880 Register RHS = SDivorRem.getReg(2);
5881 LLT Ty = MRI.getType(Dst);
5882 LLT ScalarTy = Ty.getScalarType();
5883 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5885 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5886 auto &MIB = Builder;
5887
5888 bool UseSRA = false;
5889 SmallVector<Register, 16> ExactShifts, ExactFactors;
5890
5891 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5892 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5893
5894 auto BuildExactSDIVPattern = [&](const Constant *C) {
5895 // Don't recompute inverses for each splat element.
5896 if (IsSplat && !ExactFactors.empty()) {
5897 ExactShifts.push_back(ExactShifts[0]);
5898 ExactFactors.push_back(ExactFactors[0]);
5899 return true;
5900 }
5901
5902 auto *CI = cast<ConstantInt>(C);
5903 APInt Divisor = CI->getValue();
5904 unsigned Shift = Divisor.countr_zero();
5905 if (Shift) {
5906 Divisor.ashrInPlace(Shift);
5907 UseSRA = true;
5908 }
5909
5910 // Calculate the multiplicative inverse modulo BW.
5911 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5912 APInt Factor = Divisor.multiplicativeInverse();
5913 ExactShifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5914 ExactFactors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5915 return true;
5916 };
5917
5918 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5919 // Collect all magic values from the build vector.
5920 bool Matched = matchUnaryPredicate(MRI, RHS, BuildExactSDIVPattern);
5921 (void)Matched;
5922 assert(Matched && "Expected unary predicate match to succeed");
5923
5924 Register Shift, Factor;
5925 if (Ty.isVector()) {
5926 Shift = MIB.buildBuildVector(ShiftAmtTy, ExactShifts).getReg(0);
5927 Factor = MIB.buildBuildVector(Ty, ExactFactors).getReg(0);
5928 } else {
5929 Shift = ExactShifts[0];
5930 Factor = ExactFactors[0];
5931 }
5932
5933 Register Res = LHS;
5934
5935 if (UseSRA)
5936 Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5937
5938 return MIB.buildMul(Ty, Res, Factor);
5939 }
5940
5941 SmallVector<Register, 16> MagicFactors, Factors, Shifts, ShiftMasks;
5942
5943 auto BuildSDIVPattern = [&](const Constant *C) {
5944 auto *CI = cast<ConstantInt>(C);
5945 const APInt &Divisor = CI->getValue();
5946
5949 int NumeratorFactor = 0;
5950 int ShiftMask = -1;
5951
5952 if (Divisor.isOne() || Divisor.isAllOnes()) {
5953 // If d is +1/-1, we just multiply the numerator by +1/-1.
5954 NumeratorFactor = Divisor.getSExtValue();
5955 Magics.Magic = 0;
5956 Magics.ShiftAmount = 0;
5957 ShiftMask = 0;
5958 } else if (Divisor.isStrictlyPositive() && Magics.Magic.isNegative()) {
5959 // If d > 0 and m < 0, add the numerator.
5960 NumeratorFactor = 1;
5961 } else if (Divisor.isNegative() && Magics.Magic.isStrictlyPositive()) {
5962 // If d < 0 and m > 0, subtract the numerator.
5963 NumeratorFactor = -1;
5964 }
5965
5966 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magics.Magic).getReg(0));
5967 Factors.push_back(MIB.buildConstant(ScalarTy, NumeratorFactor).getReg(0));
5968 Shifts.push_back(
5969 MIB.buildConstant(ScalarShiftAmtTy, Magics.ShiftAmount).getReg(0));
5970 ShiftMasks.push_back(MIB.buildConstant(ScalarTy, ShiftMask).getReg(0));
5971
5972 return true;
5973 };
5974
5975 // Collect the shifts/magic values from each element.
5976 bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
5977 (void)Matched;
5978 assert(Matched && "Expected unary predicate match to succeed");
5979
5980 Register MagicFactor, Factor, Shift, ShiftMask;
5981 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5982 if (RHSDef) {
5983 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5984 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5985 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5986 ShiftMask = MIB.buildBuildVector(Ty, ShiftMasks).getReg(0);
5987 } else {
5988 assert(MRI.getType(RHS).isScalar() &&
5989 "Non-build_vector operation should have been a scalar");
5990 MagicFactor = MagicFactors[0];
5991 Factor = Factors[0];
5992 Shift = Shifts[0];
5993 ShiftMask = ShiftMasks[0];
5994 }
5995
5996 Register Q = LHS;
5997 Q = MIB.buildSMulH(Ty, LHS, MagicFactor).getReg(0);
5998
5999 // (Optionally) Add/subtract the numerator using Factor.
6000 Factor = MIB.buildMul(Ty, LHS, Factor).getReg(0);
6001 Q = MIB.buildAdd(Ty, Q, Factor).getReg(0);
6002
6003 // Shift right algebraic by shift value.
6004 Q = MIB.buildAShr(Ty, Q, Shift).getReg(0);
6005
6006 // Extract the sign bit, mask it and add it to the quotient.
6007 auto SignShift = MIB.buildConstant(ShiftAmtTy, EltBits - 1);
6008 auto T = MIB.buildLShr(Ty, Q, SignShift);
6009 T = MIB.buildAnd(Ty, T, ShiftMask);
6010 auto ret = MIB.buildAdd(Ty, Q, T);
6011
6012 if (Opcode == TargetOpcode::G_SREM) {
6013 auto Prod = MIB.buildMul(Ty, ret, RHS);
6014 return MIB.buildSub(Ty, LHS, Prod);
6015 }
6016 return ret;
6017}
6018
6020 assert((MI.getOpcode() == TargetOpcode::G_SDIV ||
6021 MI.getOpcode() == TargetOpcode::G_UDIV) &&
6022 "Expected SDIV or UDIV");
6023 auto &Div = cast<GenericMachineInstr>(MI);
6024 Register RHS = Div.getReg(2);
6025 auto MatchPow2 = [&](const Constant *C) {
6026 auto *CI = dyn_cast<ConstantInt>(C);
6027 return CI && (CI->getValue().isPowerOf2() ||
6028 (IsSigned && CI->getValue().isNegatedPowerOf2()));
6029 };
6030 return matchUnaryPredicate(MRI, RHS, MatchPow2, /*AllowUndefs=*/false);
6031}
6032
6034 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
6035 auto &SDiv = cast<GenericMachineInstr>(MI);
6036 Register Dst = SDiv.getReg(0);
6037 Register LHS = SDiv.getReg(1);
6038 Register RHS = SDiv.getReg(2);
6039 LLT Ty = MRI.getType(Dst);
6041 LLT CCVT = Ty.isVector() ? LLT::vector(Ty.getElementCount(), LLT::integer(1))
6042 : LLT::integer(1);
6043
6044 // Effectively we want to lower G_SDIV %lhs, %rhs, where %rhs is a power of 2,
6045 // to the following version:
6046 //
6047 // %c1 = G_CTTZ %rhs
6048 // %inexact = G_SUB $bitwidth, %c1
6049 // %sign = %G_ASHR %lhs, $(bitwidth - 1)
6050 // %lshr = G_LSHR %sign, %inexact
6051 // %add = G_ADD %lhs, %lshr
6052 // %ashr = G_ASHR %add, %c1
6053 // %ashr = G_SELECT, %isoneorallones, %lhs, %ashr
6054 // %zero = G_CONSTANT $0
6055 // %neg = G_NEG %ashr
6056 // %isneg = G_ICMP SLT %rhs, %zero
6057 // %res = G_SELECT %isneg, %neg, %ashr
6058
6059 unsigned BitWidth = Ty.getScalarSizeInBits();
6060 auto Zero = Builder.buildConstant(Ty, 0);
6061
6062 auto Bits = Builder.buildConstant(ShiftAmtTy, BitWidth);
6063 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
6064 auto Inexact = Builder.buildSub(ShiftAmtTy, Bits, C1);
6065 // Splat the sign bit into the register
6066 auto Sign = Builder.buildAShr(
6067 Ty, LHS, Builder.buildConstant(ShiftAmtTy, BitWidth - 1));
6068
6069 // Add (LHS < 0) ? abs2 - 1 : 0;
6070 auto LSrl = Builder.buildLShr(Ty, Sign, Inexact);
6071 auto Add = Builder.buildAdd(Ty, LHS, LSrl);
6072 auto AShr = Builder.buildAShr(Ty, Add, C1);
6073
6074 // Special case: (sdiv X, 1) -> X
6075 // Special Case: (sdiv X, -1) -> 0-X
6076 auto One = Builder.buildConstant(Ty, 1);
6077 auto MinusOne = Builder.buildConstant(Ty, -1);
6078 auto IsOne = Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, One);
6079 auto IsMinusOne =
6080 Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, MinusOne);
6081 auto IsOneOrMinusOne = Builder.buildOr(CCVT, IsOne, IsMinusOne);
6082 AShr = Builder.buildSelect(Ty, IsOneOrMinusOne, LHS, AShr);
6083
6084 // If divided by a positive value, we're done. Otherwise, the result must be
6085 // negated.
6086 auto Neg = Builder.buildNeg(Ty, AShr);
6087 auto IsNeg = Builder.buildICmp(CmpInst::Predicate::ICMP_SLT, CCVT, RHS, Zero);
6088 Builder.buildSelect(MI.getOperand(0).getReg(), IsNeg, Neg, AShr);
6089 MI.eraseFromParent();
6090}
6091
6093 assert(MI.getOpcode() == TargetOpcode::G_UDIV && "Expected UDIV");
6094 auto &UDiv = cast<GenericMachineInstr>(MI);
6095 Register Dst = UDiv.getReg(0);
6096 Register LHS = UDiv.getReg(1);
6097 Register RHS = UDiv.getReg(2);
6098 LLT Ty = MRI.getType(Dst);
6100
6101 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
6102 Builder.buildLShr(MI.getOperand(0).getReg(), LHS, C1);
6103 MI.eraseFromParent();
6104}
6105
6107 assert(MI.getOpcode() == TargetOpcode::G_SREM && "Expected SREM");
6108 auto &SRem = cast<GBinOp>(MI);
6109 Register Dst = SRem.getReg(0);
6110 Register LHS = SRem.getLHSReg();
6111 Register RHS = SRem.getRHSReg();
6112 LLT Ty = MRI.getType(Dst);
6114
6115 // Effectively we want to lower G_SREM %lhs, %rhs, where %rhs is +/- a power
6116 // of 2, to the following branch-free bias-and-mask version:
6117 //
6118 // %abs = G_ABS %rhs
6119 // %mask = G_SUB %abs, 1
6120 // %sign = G_ASHR %lhs, $(bitwidth - 1)
6121 // %bias = G_AND %sign, %mask
6122 // %biased = G_ADD %lhs, %bias
6123 // %masked = G_AND %biased, %mask
6124 // %res = G_SUB %masked, %bias
6125 //
6126 // The bias adds (|%rhs| - 1) for negative %lhs, correcting rounding towards
6127 // zero (instead of towards -inf that a plain mask would give). Constant
6128 // divisors collapse %mask to a single G_CONSTANT via the CSEMIRBuilder folds
6129 // for G_ABS and G_SUB.
6130
6131 unsigned BitWidth = Ty.getScalarSizeInBits();
6132 auto AbsRHS = Builder.buildAbs(Ty, RHS);
6133 auto Mask = Builder.buildSub(Ty, AbsRHS, Builder.buildConstant(Ty, 1));
6134 auto BWMinusOne = Builder.buildConstant(ShiftAmtTy, BitWidth - 1);
6135 auto Sign = Builder.buildAShr(Ty, LHS, BWMinusOne);
6136 auto Bias = Builder.buildAnd(Ty, Sign, Mask);
6137 auto Biased = Builder.buildAdd(Ty, LHS, Bias);
6138 auto Masked = Builder.buildAnd(Ty, Biased, Mask);
6139 Builder.buildSub(Dst, Masked, Bias);
6140 MI.eraseFromParent();
6141}
6142
6144 assert(MI.getOpcode() == TargetOpcode::G_UMULH);
6145 Register RHS = MI.getOperand(2).getReg();
6146 Register Dst = MI.getOperand(0).getReg();
6147 LLT Ty = MRI.getType(Dst);
6148 LLT RHSTy = MRI.getType(RHS);
6150 auto MatchPow2ExceptOne = [&](const Constant *C) {
6151 if (auto *CI = dyn_cast<ConstantInt>(C))
6152 return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
6153 return false;
6154 };
6155 if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
6156 return false;
6157 // We need to check both G_LSHR and G_CTLZ because the combine uses G_CTLZ to
6158 // get log base 2, and it is not always legal for on a target.
6159 return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}}) &&
6160 isLegalOrBeforeLegalizer({TargetOpcode::G_CTLZ, {RHSTy, RHSTy}});
6161}
6162
6164 Register LHS = MI.getOperand(1).getReg();
6165 Register RHS = MI.getOperand(2).getReg();
6166 Register Dst = MI.getOperand(0).getReg();
6167 LLT Ty = MRI.getType(Dst);
6169 unsigned NumEltBits = Ty.getScalarSizeInBits();
6170
6171 auto LogBase2 = buildLogBase2(RHS, Builder);
6172 auto ShiftAmt =
6173 Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
6174 auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
6175 Builder.buildLShr(Dst, LHS, Trunc);
6176 MI.eraseFromParent();
6177}
6178
6180 Register &MatchInfo) const {
6181 Register Dst = MI.getOperand(0).getReg();
6182 Register Src = MI.getOperand(1).getReg();
6183 LLT DstTy = MRI.getType(Dst);
6184 LLT SrcTy = MRI.getType(Src);
6185 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6186 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6187 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6188
6190 {TargetOpcode::G_TRUNC_SSAT_S, {DstTy, SrcTy}}))
6191 return false;
6192
6193 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
6194 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
6195 return mi_match(Src, MRI,
6196 m_GSMin(m_GSMax(m_Reg(MatchInfo),
6197 m_SpecificICstOrSplat(SignedMin)),
6198 m_SpecificICstOrSplat(SignedMax))) ||
6199 mi_match(Src, MRI,
6200 m_GSMax(m_GSMin(m_Reg(MatchInfo),
6201 m_SpecificICstOrSplat(SignedMax)),
6202 m_SpecificICstOrSplat(SignedMin)));
6203}
6204
6206 Register &MatchInfo) const {
6207 Register Dst = MI.getOperand(0).getReg();
6208 Builder.buildTruncSSatS(Dst, MatchInfo);
6209 MI.eraseFromParent();
6210}
6211
6213 Register &MatchInfo) const {
6214 Register Dst = MI.getOperand(0).getReg();
6215 Register Src = MI.getOperand(1).getReg();
6216 LLT DstTy = MRI.getType(Dst);
6217 LLT SrcTy = MRI.getType(Src);
6218 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6219 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6220 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6221
6223 {TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
6224 return false;
6225 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
6226 return mi_match(Src, MRI,
6228 m_SpecificICstOrSplat(UnsignedMax))) ||
6229 mi_match(Src, MRI,
6230 m_GSMax(m_GSMin(m_Reg(MatchInfo),
6231 m_SpecificICstOrSplat(UnsignedMax)),
6232 m_SpecificICstOrSplat(0))) ||
6233 mi_match(Src, MRI,
6235 m_SpecificICstOrSplat(UnsignedMax)));
6236}
6237
6239 Register &MatchInfo) const {
6240 Register Dst = MI.getOperand(0).getReg();
6241 Builder.buildTruncSSatU(Dst, MatchInfo);
6242 MI.eraseFromParent();
6243}
6244
6246 MachineInstr &MinMI) const {
6247 Register Min = MinMI.getOperand(2).getReg();
6248 Register Val = MinMI.getOperand(1).getReg();
6249 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6250 LLT SrcTy = MRI.getType(Val);
6251 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6252 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6253 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6254
6256 {TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
6257 return false;
6258 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
6259 return mi_match(Min, MRI, m_SpecificICstOrSplat(UnsignedMax)) &&
6260 !mi_match(Val, MRI, m_GSMax(m_Reg(), m_Reg()));
6261}
6262
6264 MachineInstr &SrcMI) const {
6265 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6266 LLT SrcTy = MRI.getType(SrcMI.getOperand(1).getReg());
6267
6268 return LI &&
6269 isLegalOrBeforeLegalizer({TargetOpcode::G_FPTOUI_SAT, {DstTy, SrcTy}});
6270}
6271
6273 BuildFnTy &MatchInfo) const {
6274 unsigned Opc = MI.getOpcode();
6275 assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
6276 Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
6277 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
6278
6279 Register Dst = MI.getOperand(0).getReg();
6280 Register X = MI.getOperand(1).getReg();
6281 Register Y = MI.getOperand(2).getReg();
6282 LLT Type = MRI.getType(Dst);
6283
6284 // fold (fadd x, fneg(y)) -> (fsub x, y)
6285 // fold (fadd fneg(y), x) -> (fsub x, y)
6286 // G_ADD is commutative so both cases are checked by m_GFAdd
6287 if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
6288 isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
6289 Opc = TargetOpcode::G_FSUB;
6290 }
6291 /// fold (fsub x, fneg(y)) -> (fadd x, y)
6292 else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
6293 isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
6294 Opc = TargetOpcode::G_FADD;
6295 }
6296 // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
6297 // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
6298 // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
6299 // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
6300 else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
6301 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
6302 mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
6303 mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
6304 // no opcode change
6305 } else
6306 return false;
6307
6308 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6309 Observer.changingInstr(MI);
6310 MI.setDesc(B.getTII().get(Opc));
6311 MI.getOperand(1).setReg(X);
6312 MI.getOperand(2).setReg(Y);
6313 Observer.changedInstr(MI);
6314 };
6315 return true;
6316}
6317
6319 Register &MatchInfo) const {
6320 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6321
6322 Register LHS = MI.getOperand(1).getReg();
6323 MatchInfo = MI.getOperand(2).getReg();
6324 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
6325
6326 const auto LHSCst = Ty.isVector()
6327 ? getFConstantSplat(LHS, MRI, /* allowUndef */ true)
6329 if (!LHSCst)
6330 return false;
6331
6332 // -0.0 is always allowed
6333 if (LHSCst->Value.isNegZero())
6334 return true;
6335
6336 // +0.0 is only allowed if nsz is set.
6337 if (LHSCst->Value.isPosZero())
6338 return MI.getFlag(MachineInstr::FmNsz);
6339
6340 return false;
6341}
6342
6344 Register &MatchInfo) const {
6345 Register Dst = MI.getOperand(0).getReg();
6346 Builder.buildFNeg(
6347 Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0));
6348 eraseInst(MI);
6349}
6350
6351/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
6352/// due to global flags or MachineInstr flags.
6353static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
6354 if (MI.getOpcode() != TargetOpcode::G_FMUL)
6355 return false;
6356 return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
6357}
6358
6359static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
6360 const MachineRegisterInfo &MRI) {
6361 return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
6362 MRI.use_instr_nodbg_end()) >
6363 std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
6364 MRI.use_instr_nodbg_end());
6365}
6366
6368 bool &AllowFusionGlobally,
6369 bool &HasFMAD, bool &Aggressive,
6370 bool CanReassociate) const {
6371
6372 auto *MF = MI.getMF();
6373 const auto &TLI = *MF->getSubtarget().getTargetLowering();
6374 const TargetOptions &Options = MF->getTarget().Options;
6375 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6376
6377 if (CanReassociate && !MI.getFlag(MachineInstr::MIFlag::FmReassoc))
6378 return false;
6379
6380 // Floating-point multiply-add with intermediate rounding.
6381 HasFMAD = (!isPreLegalize() && TLI.isFMADLegal(MI, DstType));
6382 // Floating-point multiply-add without intermediate rounding.
6383 bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
6384 isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
6385 // No valid opcode, do not combine.
6386 if (!HasFMAD && !HasFMA)
6387 return false;
6388
6389 AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD;
6390 // If the addition is not contractable, do not combine.
6391 if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
6392 return false;
6393
6394 Aggressive = TLI.enableAggressiveFMAFusion(DstType);
6395 return true;
6396}
6397
6400 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6401 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6402
6403 bool AllowFusionGlobally, HasFMAD, Aggressive;
6404 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6405 return false;
6406
6407 Register Op1 = MI.getOperand(1).getReg();
6408 Register Op2 = MI.getOperand(2).getReg();
6409 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6410 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6411 unsigned PreferredFusedOpcode =
6412 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6413
6414 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6415 // prefer to fold the multiply with fewer uses.
6416 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6417 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6418 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6419 std::swap(LHS, RHS);
6420 }
6421
6422 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
6423 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6424 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) {
6425 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6426 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6427 {LHS.MI->getOperand(1).getReg(),
6428 LHS.MI->getOperand(2).getReg(), RHS.Reg});
6429 };
6430 return true;
6431 }
6432
6433 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
6434 if (isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6435 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) {
6436 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6437 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6438 {RHS.MI->getOperand(1).getReg(),
6439 RHS.MI->getOperand(2).getReg(), LHS.Reg});
6440 };
6441 return true;
6442 }
6443
6444 return false;
6445}
6446
6449 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6450 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6451
6452 bool AllowFusionGlobally, HasFMAD, Aggressive;
6453 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6454 return false;
6455
6456 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6457 Register Op1 = MI.getOperand(1).getReg();
6458 Register Op2 = MI.getOperand(2).getReg();
6459 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6460 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6461 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6462
6463 unsigned PreferredFusedOpcode =
6464 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6465
6466 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6467 // prefer to fold the multiply with fewer uses.
6468 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6469 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6470 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6471 std::swap(LHS, RHS);
6472 }
6473
6474 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
6475 MachineInstr *FpExtSrc;
6476 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
6477 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
6478 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6479 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
6480 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6481 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
6482 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
6483 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6484 {FpExtX.getReg(0), FpExtY.getReg(0), RHS.Reg});
6485 };
6486 return true;
6487 }
6488
6489 // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
6490 // Note: Commutes FADD operands.
6491 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
6492 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
6493 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6494 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
6495 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6496 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
6497 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
6498 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6499 {FpExtX.getReg(0), FpExtY.getReg(0), LHS.Reg});
6500 };
6501 return true;
6502 }
6503
6504 return false;
6505}
6506
6509 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6510 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6511
6512 bool AllowFusionGlobally, HasFMAD, Aggressive;
6513 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
6514 return false;
6515
6516 Register Op1 = MI.getOperand(1).getReg();
6517 Register Op2 = MI.getOperand(2).getReg();
6518 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6519 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6520 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6521
6522 unsigned PreferredFusedOpcode =
6523 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6524
6525 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6526 // prefer to fold the multiply with fewer uses.
6527 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6528 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6529 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6530 std::swap(LHS, RHS);
6531 }
6532
6533 MachineInstr *FMA = nullptr;
6534 Register Z;
6535 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
6536 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
6537 (MRI.getVRegDef(LHS.MI->getOperand(3).getReg())->getOpcode() ==
6538 TargetOpcode::G_FMUL) &&
6539 MRI.hasOneNonDBGUse(LHS.MI->getOperand(0).getReg()) &&
6540 MRI.hasOneNonDBGUse(LHS.MI->getOperand(3).getReg())) {
6541 FMA = LHS.MI;
6542 Z = RHS.Reg;
6543 }
6544 // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
6545 else if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
6546 (MRI.getVRegDef(RHS.MI->getOperand(3).getReg())->getOpcode() ==
6547 TargetOpcode::G_FMUL) &&
6548 MRI.hasOneNonDBGUse(RHS.MI->getOperand(0).getReg()) &&
6549 MRI.hasOneNonDBGUse(RHS.MI->getOperand(3).getReg())) {
6550 Z = LHS.Reg;
6551 FMA = RHS.MI;
6552 }
6553
6554 if (FMA) {
6555 MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
6556 Register X = FMA->getOperand(1).getReg();
6557 Register Y = FMA->getOperand(2).getReg();
6558 Register U = FMulMI->getOperand(1).getReg();
6559 Register V = FMulMI->getOperand(2).getReg();
6560
6561 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6562 Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
6563 B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
6564 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6565 {X, Y, InnerFMA});
6566 };
6567 return true;
6568 }
6569
6570 return false;
6571}
6572
6575 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6576 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6577
6578 bool AllowFusionGlobally, HasFMAD, Aggressive;
6579 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6580 return false;
6581
6582 if (!Aggressive)
6583 return false;
6584
6585 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6586 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6587 Register Op1 = MI.getOperand(1).getReg();
6588 Register Op2 = MI.getOperand(2).getReg();
6589 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6590 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6591
6592 unsigned PreferredFusedOpcode =
6593 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6594
6595 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6596 // prefer to fold the multiply with fewer uses.
6597 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6598 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6599 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6600 std::swap(LHS, RHS);
6601 }
6602
6603 // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
6604 auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
6606 Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
6607 Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
6608 Register InnerFMA =
6609 B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
6610 .getReg(0);
6611 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6612 {X, Y, InnerFMA});
6613 };
6614
6615 MachineInstr *FMulMI, *FMAMI;
6616 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
6617 // -> (fma x, y, (fma (fpext u), (fpext v), z))
6618 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
6619 mi_match(LHS.MI->getOperand(3).getReg(), MRI,
6620 m_GFPExt(m_MInstr(FMulMI))) &&
6621 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6622 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6623 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6624 MatchInfo = [=](MachineIRBuilder &B) {
6625 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6626 FMulMI->getOperand(2).getReg(), RHS.Reg,
6627 LHS.MI->getOperand(1).getReg(),
6628 LHS.MI->getOperand(2).getReg(), B);
6629 };
6630 return true;
6631 }
6632
6633 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
6634 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
6635 // FIXME: This turns two single-precision and one double-precision
6636 // operation into two double-precision operations, which might not be
6637 // interesting for all targets, especially GPUs.
6638 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
6639 FMAMI->getOpcode() == PreferredFusedOpcode) {
6640 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
6641 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6642 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6643 MRI.getType(FMAMI->getOperand(0).getReg()))) {
6644 MatchInfo = [=](MachineIRBuilder &B) {
6645 Register X = FMAMI->getOperand(1).getReg();
6646 Register Y = FMAMI->getOperand(2).getReg();
6647 X = B.buildFPExt(DstType, X).getReg(0);
6648 Y = B.buildFPExt(DstType, Y).getReg(0);
6649 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6650 FMulMI->getOperand(2).getReg(), RHS.Reg, X, Y, B);
6651 };
6652
6653 return true;
6654 }
6655 }
6656
6657 // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
6658 // -> (fma x, y, (fma (fpext u), (fpext v), z))
6659 if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
6660 mi_match(RHS.MI->getOperand(3).getReg(), MRI,
6661 m_GFPExt(m_MInstr(FMulMI))) &&
6662 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6663 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6664 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6665 MatchInfo = [=](MachineIRBuilder &B) {
6666 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6667 FMulMI->getOperand(2).getReg(), LHS.Reg,
6668 RHS.MI->getOperand(1).getReg(),
6669 RHS.MI->getOperand(2).getReg(), B);
6670 };
6671 return true;
6672 }
6673
6674 // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
6675 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
6676 // FIXME: This turns two single-precision and one double-precision
6677 // operation into two double-precision operations, which might not be
6678 // interesting for all targets, especially GPUs.
6679 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
6680 FMAMI->getOpcode() == PreferredFusedOpcode) {
6681 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
6682 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6683 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6684 MRI.getType(FMAMI->getOperand(0).getReg()))) {
6685 MatchInfo = [=](MachineIRBuilder &B) {
6686 Register X = FMAMI->getOperand(1).getReg();
6687 Register Y = FMAMI->getOperand(2).getReg();
6688 X = B.buildFPExt(DstType, X).getReg(0);
6689 Y = B.buildFPExt(DstType, Y).getReg(0);
6690 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6691 FMulMI->getOperand(2).getReg(), LHS.Reg, X, Y, B);
6692 };
6693 return true;
6694 }
6695 }
6696
6697 return false;
6698}
6699
6702 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6703 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6704
6705 bool AllowFusionGlobally, HasFMAD, Aggressive;
6706 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6707 return false;
6708
6709 Register Op1 = MI.getOperand(1).getReg();
6710 Register Op2 = MI.getOperand(2).getReg();
6711 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6712 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6713 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6714
6715 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6716 // prefer to fold the multiply with fewer uses.
6717 int FirstMulHasFewerUses = true;
6718 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6719 isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6720 hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6721 FirstMulHasFewerUses = false;
6722
6723 unsigned PreferredFusedOpcode =
6724 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6725
6726 // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
6727 if (FirstMulHasFewerUses &&
6728 (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6729 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg)))) {
6730 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6731 Register NegZ = B.buildFNeg(DstTy, RHS.Reg).getReg(0);
6732 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6733 {LHS.MI->getOperand(1).getReg(),
6734 LHS.MI->getOperand(2).getReg(), NegZ});
6735 };
6736 return true;
6737 }
6738 // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
6739 else if ((isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6740 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg)))) {
6741 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6742 Register NegY =
6743 B.buildFNeg(DstTy, RHS.MI->getOperand(1).getReg()).getReg(0);
6744 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6745 {NegY, RHS.MI->getOperand(2).getReg(), LHS.Reg});
6746 };
6747 return true;
6748 }
6749
6750 return false;
6751}
6752
6755 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6756 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6757
6758 bool AllowFusionGlobally, HasFMAD, Aggressive;
6759 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6760 return false;
6761
6762 Register LHSReg = MI.getOperand(1).getReg();
6763 Register RHSReg = MI.getOperand(2).getReg();
6764 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6765
6766 unsigned PreferredFusedOpcode =
6767 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6768
6769 MachineInstr *FMulMI;
6770 // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
6771 if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6772 (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
6773 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6774 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6775 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6776 Register NegX =
6777 B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6778 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6779 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6780 {NegX, FMulMI->getOperand(2).getReg(), NegZ});
6781 };
6782 return true;
6783 }
6784
6785 // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
6786 if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6787 (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
6788 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6789 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6790 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6791 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6792 {FMulMI->getOperand(1).getReg(),
6793 FMulMI->getOperand(2).getReg(), LHSReg});
6794 };
6795 return true;
6796 }
6797
6798 return false;
6799}
6800
6803 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6804 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6805
6806 bool AllowFusionGlobally, HasFMAD, Aggressive;
6807 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6808 return false;
6809
6810 Register LHSReg = MI.getOperand(1).getReg();
6811 Register RHSReg = MI.getOperand(2).getReg();
6812 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6813
6814 unsigned PreferredFusedOpcode =
6815 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6816
6817 MachineInstr *FMulMI;
6818 // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
6819 if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6820 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6821 (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
6822 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6823 Register FpExtX =
6824 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6825 Register FpExtY =
6826 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6827 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6828 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6829 {FpExtX, FpExtY, NegZ});
6830 };
6831 return true;
6832 }
6833
6834 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
6835 if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6836 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6837 (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
6838 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6839 Register FpExtY =
6840 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6841 Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
6842 Register FpExtZ =
6843 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6844 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6845 {NegY, FpExtZ, LHSReg});
6846 };
6847 return true;
6848 }
6849
6850 return false;
6851}
6852
6855 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6856 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6857
6858 bool AllowFusionGlobally, HasFMAD, Aggressive;
6859 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6860 return false;
6861
6862 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6863 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6864 Register LHSReg = MI.getOperand(1).getReg();
6865 Register RHSReg = MI.getOperand(2).getReg();
6866
6867 unsigned PreferredFusedOpcode =
6868 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6869
6870 auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
6872 Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
6873 Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
6874 B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
6875 };
6876
6877 MachineInstr *FMulMI;
6878 // fold (fsub (fpext (fneg (fmul x, y))), z) ->
6879 // (fneg (fma (fpext x), (fpext y), z))
6880 // fold (fsub (fneg (fpext (fmul x, y))), z) ->
6881 // (fneg (fma (fpext x), (fpext y), z))
6882 if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6883 mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6884 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6885 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6886 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6887 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6888 Register FMAReg = MRI.createGenericVirtualRegister(DstTy);
6889 buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
6890 FMulMI->getOperand(2).getReg(), RHSReg, B);
6891 B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
6892 };
6893 return true;
6894 }
6895
6896 // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6897 // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6898 if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6899 mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6900 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6901 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6902 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6903 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6904 buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
6905 FMulMI->getOperand(2).getReg(), LHSReg, B);
6906 };
6907 return true;
6908 }
6909
6910 return false;
6911}
6912
6914 unsigned &IdxToPropagate) const {
6915 bool PropagateNaN;
6916 switch (MI.getOpcode()) {
6917 default:
6918 return false;
6919 case TargetOpcode::G_FMINNUM:
6920 case TargetOpcode::G_FMAXNUM:
6921 PropagateNaN = false;
6922 break;
6923 case TargetOpcode::G_FMINIMUM:
6924 case TargetOpcode::G_FMAXIMUM:
6925 PropagateNaN = true;
6926 break;
6927 }
6928
6929 auto MatchNaN = [&](unsigned Idx) {
6930 Register MaybeNaNReg = MI.getOperand(Idx).getReg();
6931 const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI);
6932 if (!MaybeCst || !MaybeCst->getValueAPF().isNaN())
6933 return false;
6934 IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1);
6935 return true;
6936 };
6937
6938 return MatchNaN(1) || MatchNaN(2);
6939}
6940
6941// Combine multiple FDIVs with the same divisor into multiple FMULs by the
6942// reciprocal.
6943// E.g., (a / Y; b / Y;) -> (recip = 1.0 / Y; a * recip; b * recip)
6945 MachineInstr &MI, SmallVector<MachineInstr *> &MatchInfo) const {
6946 assert(MI.getOpcode() == TargetOpcode::G_FDIV);
6947
6948 Register X = MI.getOperand(1).getReg();
6949 Register Y = MI.getOperand(2).getReg();
6950
6951 if (!MI.getFlag(MachineInstr::MIFlag::FmArcp))
6952 return false;
6953
6954 auto IsOne = [this](Register X) {
6955 auto N0CFP = isConstantOrConstantSplatVectorFP(*MRI.getVRegDef(X), MRI);
6956 return N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0));
6957 };
6958
6959 // Skip if current node is a reciprocal/fneg-reciprocal.
6960 if (IsOne(X))
6961 return false;
6962
6963 // Exit early if the target does not want this transform or if there can't
6964 // possibly be enough uses of the divisor to make the transform worthwhile.
6965 unsigned MinUses = getTargetLowering().combineRepeatedFPDivisors();
6966 if (!MinUses)
6967 return false;
6968
6969 // Find all FDIV users of the same divisor. For the moment we limit all
6970 // instructions to a single BB and use the first Instr in MatchInfo as the
6971 // dominating position.
6972 MatchInfo.push_back(&MI);
6973 for (auto &U : MRI.use_nodbg_instructions(Y)) {
6974 if (&U == &MI || U.getParent() != MI.getParent())
6975 continue;
6976 if (U.getOpcode() == TargetOpcode::G_FDIV &&
6977 U.getOperand(2).getReg() == Y && U.getOperand(1).getReg() != Y &&
6978 !IsOne(U.getOperand(1).getReg())) {
6979 // This division is eligible for optimization only if global unsafe math
6980 // is enabled or if this division allows reciprocal formation.
6981 if (U.getFlag(MachineInstr::MIFlag::FmArcp)) {
6982 MatchInfo.push_back(&U);
6983 if (dominates(U, *MatchInfo[0]))
6984 std::swap(MatchInfo[0], MatchInfo.back());
6985 }
6986 }
6987 }
6988
6989 // Now that we have the actual number of divisor uses, make sure it meets
6990 // the minimum threshold specified by the target.
6991 return MatchInfo.size() >= MinUses;
6992}
6993
6995 SmallVector<MachineInstr *> &MatchInfo) const {
6996 // Generate the new div at the position of the first instruction, that we have
6997 // ensured will dominate all other instructions.
6998 Builder.setInsertPt(*MatchInfo[0]->getParent(), MatchInfo[0]);
6999 LLT Ty = MRI.getType(MatchInfo[0]->getOperand(0).getReg());
7000 auto Div = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0),
7001 MatchInfo[0]->getOperand(2).getReg(),
7002 MatchInfo[0]->getFlags());
7003
7004 // Replace all found div's with fmul instructions.
7005 for (MachineInstr *MI : MatchInfo) {
7006 Builder.setInsertPt(*MI->getParent(), MI);
7007 Builder.buildFMul(MI->getOperand(0).getReg(), MI->getOperand(1).getReg(),
7008 Div->getOperand(0).getReg(), MI->getFlags());
7009 MI->eraseFromParent();
7010 }
7011}
7012
7014 assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD");
7015 Register LHS = MI.getOperand(1).getReg();
7016 Register RHS = MI.getOperand(2).getReg();
7017
7018 // Helper lambda to check for opportunities for
7019 // A + (B - A) -> B
7020 // (B - A) + A -> B
7021 auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) {
7022 Register Reg;
7023 return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) &&
7024 Reg == MaybeSameReg;
7025 };
7026 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
7027}
7028
7030 Register &MatchInfo) const {
7031 // This combine folds the following patterns:
7032 //
7033 // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k))
7034 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k)))
7035 // into
7036 // x
7037 // if
7038 // k == sizeof(VecEltTy)/2
7039 // type(x) == type(dst)
7040 //
7041 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef)
7042 // into
7043 // x
7044 // if
7045 // type(x) == type(dst)
7046
7047 LLT DstVecTy = MRI.getType(MI.getOperand(0).getReg());
7048 LLT DstEltTy = DstVecTy.getElementType();
7049
7050 Register Lo, Hi;
7051
7052 if (mi_match(
7053 MI, MRI,
7055 MatchInfo = Lo;
7056 return MRI.getType(MatchInfo) == DstVecTy;
7057 }
7058
7059 std::optional<ValueAndVReg> ShiftAmount;
7060 const auto LoPattern = m_GBitcast(m_Reg(Lo));
7061 const auto HiPattern = m_GLShr(m_GBitcast(m_Reg(Hi)), m_GCst(ShiftAmount));
7062 if (mi_match(
7063 MI, MRI,
7064 m_any_of(m_GBuildVectorTrunc(LoPattern, HiPattern),
7065 m_GBuildVector(m_GTrunc(LoPattern), m_GTrunc(HiPattern))))) {
7066 if (Lo == Hi && ShiftAmount->Value == DstEltTy.getSizeInBits()) {
7067 MatchInfo = Lo;
7068 return MRI.getType(MatchInfo) == DstVecTy;
7069 }
7070 }
7071
7072 return false;
7073}
7074
7076 Register &MatchInfo) const {
7077 // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x
7078 // if type(x) == type(G_TRUNC)
7079 if (!mi_match(MI.getOperand(1).getReg(), MRI,
7080 m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg()))))
7081 return false;
7082
7083 return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg());
7084}
7085
7087 Register &MatchInfo) const {
7088 // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with
7089 // y if K == size of vector element type
7090 std::optional<ValueAndVReg> ShiftAmt;
7091 if (!mi_match(MI.getOperand(1).getReg(), MRI,
7093 m_GCst(ShiftAmt))))
7094 return false;
7095
7096 LLT MatchTy = MRI.getType(MatchInfo);
7097 return ShiftAmt->Value.getZExtValue() == MatchTy.getSizeInBits() &&
7098 MatchTy == MRI.getType(MI.getOperand(0).getReg());
7099}
7100
7101unsigned CombinerHelper::getFPMinMaxOpcForSelect(
7102 CmpInst::Predicate Pred, LLT DstTy,
7103 SelectPatternNaNBehaviour VsNaNRetVal) const {
7104 assert(VsNaNRetVal != SelectPatternNaNBehaviour::NOT_APPLICABLE &&
7105 "Expected a NaN behaviour?");
7106 // Choose an opcode based off of legality or the behaviour when one of the
7107 // LHS/RHS may be NaN.
7108 switch (Pred) {
7109 default:
7110 return 0;
7111 case CmpInst::FCMP_UGT:
7112 case CmpInst::FCMP_UGE:
7113 case CmpInst::FCMP_OGT:
7114 case CmpInst::FCMP_OGE:
7115 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
7116 return TargetOpcode::G_FMAXNUM;
7117 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
7118 return TargetOpcode::G_FMAXIMUM;
7119 if (isLegal({TargetOpcode::G_FMAXNUM, {DstTy}}))
7120 return TargetOpcode::G_FMAXNUM;
7121 if (isLegal({TargetOpcode::G_FMAXIMUM, {DstTy}}))
7122 return TargetOpcode::G_FMAXIMUM;
7123 return 0;
7124 case CmpInst::FCMP_ULT:
7125 case CmpInst::FCMP_ULE:
7126 case CmpInst::FCMP_OLT:
7127 case CmpInst::FCMP_OLE:
7128 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
7129 return TargetOpcode::G_FMINNUM;
7130 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
7131 return TargetOpcode::G_FMINIMUM;
7132 if (isLegal({TargetOpcode::G_FMINNUM, {DstTy}}))
7133 return TargetOpcode::G_FMINNUM;
7134 if (!isLegal({TargetOpcode::G_FMINIMUM, {DstTy}}))
7135 return 0;
7136 return TargetOpcode::G_FMINIMUM;
7137 }
7138}
7139
7140CombinerHelper::SelectPatternNaNBehaviour
7141CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
7142 bool IsOrderedComparison) const {
7143 bool LHSSafe = VT->isKnownNeverNaN(LHS);
7144 bool RHSSafe = VT->isKnownNeverNaN(RHS);
7145 // Completely unsafe.
7146 if (!LHSSafe && !RHSSafe)
7147 return SelectPatternNaNBehaviour::NOT_APPLICABLE;
7148 if (LHSSafe && RHSSafe)
7149 return SelectPatternNaNBehaviour::RETURNS_ANY;
7150 // An ordered comparison will return false when given a NaN, so it
7151 // returns the RHS.
7152 if (IsOrderedComparison)
7153 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_NAN
7154 : SelectPatternNaNBehaviour::RETURNS_OTHER;
7155 // An unordered comparison will return true when given a NaN, so it
7156 // returns the LHS.
7157 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_OTHER
7158 : SelectPatternNaNBehaviour::RETURNS_NAN;
7159}
7160
7161bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
7162 Register TrueVal, Register FalseVal,
7163 BuildFnTy &MatchInfo) const {
7164 // Match: select (fcmp cond x, y) x, y
7165 // select (fcmp cond x, y) y, x
7166 // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
7167 LLT DstTy = MRI.getType(Dst);
7168 // Bail out early on pointers, since we'll never want to fold to a min/max.
7169 if (DstTy.isPointer())
7170 return false;
7171 // Match a floating point compare with a less-than/greater-than predicate.
7172 // TODO: Allow multiple users of the compare if they are all selects.
7173 CmpInst::Predicate Pred;
7174 Register CmpLHS, CmpRHS;
7175 if (!mi_match(Cond, MRI,
7177 m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) ||
7178 CmpInst::isEquality(Pred))
7179 return false;
7180 SelectPatternNaNBehaviour ResWithKnownNaNInfo =
7181 computeRetValAgainstNaN(CmpLHS, CmpRHS, CmpInst::isOrdered(Pred));
7182 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::NOT_APPLICABLE)
7183 return false;
7184 if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
7185 std::swap(CmpLHS, CmpRHS);
7186 Pred = CmpInst::getSwappedPredicate(Pred);
7187 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_NAN)
7188 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_OTHER;
7189 else if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_OTHER)
7190 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_NAN;
7191 }
7192 if (TrueVal != CmpLHS || FalseVal != CmpRHS)
7193 return false;
7194 // Decide what type of max/min this should be based off of the predicate.
7195 unsigned Opc = getFPMinMaxOpcForSelect(Pred, DstTy, ResWithKnownNaNInfo);
7196 if (!Opc || !isLegal({Opc, {DstTy}}))
7197 return false;
7198 // Comparisons between signed zero and zero may have different results...
7199 // unless we have fmaximum/fminimum. In that case, we know -0 < 0.
7200 if (Opc != TargetOpcode::G_FMAXIMUM && Opc != TargetOpcode::G_FMINIMUM) {
7201 // We don't know if a comparison between two 0s will give us a consistent
7202 // result. Be conservative and only proceed if at least one side is
7203 // non-zero.
7204 auto KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpLHS, MRI);
7205 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero()) {
7206 KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpRHS, MRI);
7207 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero())
7208 return false;
7209 }
7210 }
7211 MatchInfo = [=](MachineIRBuilder &B) {
7212 B.buildInstr(Opc, {Dst}, {CmpLHS, CmpRHS});
7213 };
7214 return true;
7215}
7216
7218 BuildFnTy &MatchInfo) const {
7219 // TODO: Handle integer cases.
7220 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
7221 // Condition may be fed by a truncated compare.
7222 Register Cond = MI.getOperand(1).getReg();
7223 Register MaybeTrunc;
7224 if (mi_match(Cond, MRI, m_OneNonDBGUse(m_GTrunc(m_Reg(MaybeTrunc)))))
7225 Cond = MaybeTrunc;
7226 Register Dst = MI.getOperand(0).getReg();
7227 Register TrueVal = MI.getOperand(2).getReg();
7228 Register FalseVal = MI.getOperand(3).getReg();
7229 return matchFPSelectToMinMax(Dst, Cond, TrueVal, FalseVal, MatchInfo);
7230}
7231
7233 BuildFnTy &MatchInfo) const {
7234 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
7235 // (X + Y) == X --> Y == 0
7236 // (X + Y) != X --> Y != 0
7237 // (X - Y) == X --> Y == 0
7238 // (X - Y) != X --> Y != 0
7239 // (X ^ Y) == X --> Y == 0
7240 // (X ^ Y) != X --> Y != 0
7241 Register Dst = MI.getOperand(0).getReg();
7242 CmpInst::Predicate Pred;
7243 Register X, Y, OpLHS, OpRHS;
7244 bool MatchedSub = mi_match(
7245 Dst, MRI,
7246 m_c_GICmp(m_Pred(Pred), m_Reg(X), m_GSub(m_Reg(OpLHS), m_Reg(Y))));
7247 if (MatchedSub && X != OpLHS)
7248 return false;
7249 if (!MatchedSub) {
7250 if (!mi_match(Dst, MRI,
7251 m_c_GICmp(m_Pred(Pred), m_Reg(X),
7252 m_any_of(m_GAdd(m_Reg(OpLHS), m_Reg(OpRHS)),
7253 m_GXor(m_Reg(OpLHS), m_Reg(OpRHS))))))
7254 return false;
7255 Y = X == OpLHS ? OpRHS : X == OpRHS ? OpLHS : Register();
7256 }
7257 MatchInfo = [=](MachineIRBuilder &B) {
7258 auto Zero = B.buildConstant(MRI.getType(Y), 0);
7259 B.buildICmp(Pred, Dst, Y, Zero);
7260 };
7261 return CmpInst::isEquality(Pred) && Y.isValid();
7262}
7263
7264/// Return the minimum useless shift amount that results in complete loss of the
7265/// source value. Return std::nullopt when it cannot determine a value.
7266static std::optional<unsigned>
7267getMinUselessShift(KnownBits ValueKB, unsigned Opcode,
7268 std::optional<int64_t> &Result) {
7269 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR ||
7270 Opcode == TargetOpcode::G_ASHR) &&
7271 "Expect G_SHL, G_LSHR or G_ASHR.");
7272 auto SignificantBits = 0;
7273 switch (Opcode) {
7274 case TargetOpcode::G_SHL:
7275 SignificantBits = ValueKB.countMinTrailingZeros();
7276 Result = 0;
7277 break;
7278 case TargetOpcode::G_LSHR:
7279 Result = 0;
7280 SignificantBits = ValueKB.countMinLeadingZeros();
7281 break;
7282 case TargetOpcode::G_ASHR:
7283 if (ValueKB.isNonNegative()) {
7284 SignificantBits = ValueKB.countMinLeadingZeros();
7285 Result = 0;
7286 } else if (ValueKB.isNegative()) {
7287 SignificantBits = ValueKB.countMinLeadingOnes();
7288 Result = -1;
7289 } else {
7290 // Cannot determine shift result.
7291 Result = std::nullopt;
7292 }
7293 break;
7294 default:
7295 break;
7296 }
7297 return ValueKB.getBitWidth() - SignificantBits;
7298}
7299
7301 MachineInstr &MI, std::optional<int64_t> &MatchInfo) const {
7302 Register ShiftVal = MI.getOperand(1).getReg();
7303 Register ShiftReg = MI.getOperand(2).getReg();
7304 LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
7305 auto IsShiftTooBig = [&](const Constant *C) {
7306 auto *CI = dyn_cast<ConstantInt>(C);
7307 if (!CI)
7308 return false;
7309 if (CI->uge(ResTy.getScalarSizeInBits())) {
7310 MatchInfo = std::nullopt;
7311 return true;
7312 }
7313 auto OptMaxUsefulShift = getMinUselessShift(VT->getKnownBits(ShiftVal),
7314 MI.getOpcode(), MatchInfo);
7315 return OptMaxUsefulShift && CI->uge(*OptMaxUsefulShift);
7316 };
7317 return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
7318}
7319
7321 unsigned LHSOpndIdx = 1;
7322 unsigned RHSOpndIdx = 2;
7323 switch (MI.getOpcode()) {
7324 case TargetOpcode::G_UADDO:
7325 case TargetOpcode::G_SADDO:
7326 case TargetOpcode::G_UMULO:
7327 case TargetOpcode::G_SMULO:
7328 LHSOpndIdx = 2;
7329 RHSOpndIdx = 3;
7330 break;
7331 default:
7332 break;
7333 }
7334 Register LHS = MI.getOperand(LHSOpndIdx).getReg();
7335 Register RHS = MI.getOperand(RHSOpndIdx).getReg();
7336 if (!getIConstantVRegVal(LHS, MRI)) {
7337 // Skip commuting if LHS is not a constant. But, LHS may be a
7338 // G_CONSTANT_FOLD_BARRIER. If so we commute as long as we don't already
7339 // have a constant on the RHS.
7340 if (MRI.getVRegDef(LHS)->getOpcode() !=
7341 TargetOpcode::G_CONSTANT_FOLD_BARRIER)
7342 return false;
7343 }
7344 // Commute as long as RHS is not a constant or G_CONSTANT_FOLD_BARRIER.
7345 return MRI.getVRegDef(RHS)->getOpcode() !=
7346 TargetOpcode::G_CONSTANT_FOLD_BARRIER &&
7347 !getIConstantVRegVal(RHS, MRI);
7348}
7349
7351 Register LHS = MI.getOperand(1).getReg();
7352 Register RHS = MI.getOperand(2).getReg();
7353 std::optional<FPValueAndVReg> ValAndVReg;
7354 if (!mi_match(LHS, MRI, m_GFCstOrSplat(ValAndVReg)))
7355 return false;
7356 return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg));
7357}
7358
7360 Observer.changingInstr(MI);
7361 unsigned LHSOpndIdx = 1;
7362 unsigned RHSOpndIdx = 2;
7363 switch (MI.getOpcode()) {
7364 case TargetOpcode::G_UADDO:
7365 case TargetOpcode::G_SADDO:
7366 case TargetOpcode::G_UMULO:
7367 case TargetOpcode::G_SMULO:
7368 LHSOpndIdx = 2;
7369 RHSOpndIdx = 3;
7370 break;
7371 default:
7372 break;
7373 }
7374 Register LHSReg = MI.getOperand(LHSOpndIdx).getReg();
7375 Register RHSReg = MI.getOperand(RHSOpndIdx).getReg();
7376 MI.getOperand(LHSOpndIdx).setReg(RHSReg);
7377 MI.getOperand(RHSOpndIdx).setReg(LHSReg);
7378 Observer.changedInstr(MI);
7379}
7380
7381bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) const {
7382 LLT SrcTy = MRI.getType(Src);
7383 if (SrcTy.isFixedVector())
7384 return isConstantSplatVector(Src, 1, AllowUndefs);
7385 if (SrcTy.isScalar()) {
7386 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
7387 return true;
7388 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7389 return IConstant && IConstant->Value == 1;
7390 }
7391 return false; // scalable vector
7392}
7393
7394bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) const {
7395 LLT SrcTy = MRI.getType(Src);
7396 if (SrcTy.isFixedVector())
7397 return isConstantSplatVector(Src, 0, AllowUndefs);
7398 if (SrcTy.isScalar()) {
7399 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
7400 return true;
7401 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7402 return IConstant && IConstant->Value == 0;
7403 }
7404 return false; // scalable vector
7405}
7406
7407// Ignores COPYs during conformance checks.
7408// FIXME scalable vectors.
7409bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue,
7410 bool AllowUndefs) const {
7411 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7412 if (!BuildVector)
7413 return false;
7414 unsigned NumSources = BuildVector->getNumSources();
7415
7416 for (unsigned I = 0; I < NumSources; ++I) {
7417 GImplicitDef *ImplicitDef =
7419 if (ImplicitDef && AllowUndefs)
7420 continue;
7421 if (ImplicitDef && !AllowUndefs)
7422 return false;
7423 std::optional<ValueAndVReg> IConstant =
7425 if (IConstant && IConstant->Value == SplatValue)
7426 continue;
7427 return false;
7428 }
7429 return true;
7430}
7431
7432// Ignores COPYs during lookups.
7433// FIXME scalable vectors
7434std::optional<APInt>
7435CombinerHelper::getConstantOrConstantSplatVector(Register Src) const {
7436 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7437 if (IConstant)
7438 return IConstant->Value;
7439
7440 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7441 if (!BuildVector)
7442 return std::nullopt;
7443 unsigned NumSources = BuildVector->getNumSources();
7444
7445 std::optional<APInt> Value = std::nullopt;
7446 for (unsigned I = 0; I < NumSources; ++I) {
7447 std::optional<ValueAndVReg> IConstant =
7449 if (!IConstant)
7450 return std::nullopt;
7451 if (!Value)
7452 Value = IConstant->Value;
7453 else if (*Value != IConstant->Value)
7454 return std::nullopt;
7455 }
7456 return Value;
7457}
7458
7459// FIXME G_SPLAT_VECTOR
7460bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const {
7461 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7462 if (IConstant)
7463 return true;
7464
7465 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7466 if (!BuildVector)
7467 return false;
7468
7469 unsigned NumSources = BuildVector->getNumSources();
7470 for (unsigned I = 0; I < NumSources; ++I) {
7471 std::optional<ValueAndVReg> IConstant =
7473 if (!IConstant)
7474 return false;
7475 }
7476 return true;
7477}
7478
7479// TODO: use knownbits to determine zeros
7480bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
7481 BuildFnTy &MatchInfo) const {
7482 uint32_t Flags = Select->getFlags();
7483 Register Dest = Select->getReg(0);
7484 Register Cond = Select->getCondReg();
7485 Register True = Select->getTrueReg();
7486 Register False = Select->getFalseReg();
7487 LLT CondTy = MRI.getType(Select->getCondReg());
7488 LLT TrueTy = MRI.getType(Select->getTrueReg());
7489
7490 // We only do this combine for scalar boolean conditions.
7491 if (CondTy != LLT::scalar(1))
7492 return false;
7493
7494 if (TrueTy.isPointer())
7495 return false;
7496
7497 // Both are scalars.
7498 std::optional<ValueAndVReg> TrueOpt =
7500 std::optional<ValueAndVReg> FalseOpt =
7502
7503 if (!TrueOpt || !FalseOpt)
7504 return false;
7505
7506 APInt TrueValue = TrueOpt->Value;
7507 APInt FalseValue = FalseOpt->Value;
7508
7509 // select Cond, 1, 0 --> zext (Cond)
7510 if (TrueValue.isOne() && FalseValue.isZero()) {
7511 MatchInfo = [=](MachineIRBuilder &B) {
7512 B.setInstrAndDebugLoc(*Select);
7513 B.buildZExtOrTrunc(Dest, Cond);
7514 };
7515 return true;
7516 }
7517
7518 // select Cond, -1, 0 --> sext (Cond)
7519 if (TrueValue.isAllOnes() && FalseValue.isZero()) {
7520 MatchInfo = [=](MachineIRBuilder &B) {
7521 B.setInstrAndDebugLoc(*Select);
7522 B.buildSExtOrTrunc(Dest, Cond);
7523 };
7524 return true;
7525 }
7526
7527 // select Cond, 0, 1 --> zext (!Cond)
7528 if (TrueValue.isZero() && FalseValue.isOne()) {
7529 MatchInfo = [=](MachineIRBuilder &B) {
7530 B.setInstrAndDebugLoc(*Select);
7531 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7532 B.buildNot(Inner, Cond);
7533 B.buildZExtOrTrunc(Dest, Inner);
7534 };
7535 return true;
7536 }
7537
7538 // select Cond, 0, -1 --> sext (!Cond)
7539 if (TrueValue.isZero() && FalseValue.isAllOnes()) {
7540 MatchInfo = [=](MachineIRBuilder &B) {
7541 B.setInstrAndDebugLoc(*Select);
7542 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7543 B.buildNot(Inner, Cond);
7544 B.buildSExtOrTrunc(Dest, Inner);
7545 };
7546 return true;
7547 }
7548
7549 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
7550 if (TrueValue - 1 == FalseValue) {
7551 MatchInfo = [=](MachineIRBuilder &B) {
7552 B.setInstrAndDebugLoc(*Select);
7553 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7554 B.buildZExtOrTrunc(Inner, Cond);
7555 B.buildAdd(Dest, Inner, False);
7556 };
7557 return true;
7558 }
7559
7560 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
7561 if (TrueValue + 1 == FalseValue) {
7562 MatchInfo = [=](MachineIRBuilder &B) {
7563 B.setInstrAndDebugLoc(*Select);
7564 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7565 B.buildSExtOrTrunc(Inner, Cond);
7566 B.buildAdd(Dest, Inner, False);
7567 };
7568 return true;
7569 }
7570
7571 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
7572 if (TrueValue.isPowerOf2() && FalseValue.isZero()) {
7573 MatchInfo = [=](MachineIRBuilder &B) {
7574 B.setInstrAndDebugLoc(*Select);
7575 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7576 B.buildZExtOrTrunc(Inner, Cond);
7577 // The shift amount must be scalar.
7578 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
7579 auto ShAmtC = B.buildConstant(ShiftTy, TrueValue.exactLogBase2());
7580 B.buildShl(Dest, Inner, ShAmtC, Flags);
7581 };
7582 return true;
7583 }
7584
7585 // select Cond, 0, Pow2 --> (zext (!Cond)) << log2(Pow2)
7586 if (FalseValue.isPowerOf2() && TrueValue.isZero()) {
7587 MatchInfo = [=](MachineIRBuilder &B) {
7588 B.setInstrAndDebugLoc(*Select);
7589 Register Not = MRI.createGenericVirtualRegister(CondTy);
7590 B.buildNot(Not, Cond);
7591 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7592 B.buildZExtOrTrunc(Inner, Not);
7593 // The shift amount must be scalar.
7594 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
7595 auto ShAmtC = B.buildConstant(ShiftTy, FalseValue.exactLogBase2());
7596 B.buildShl(Dest, Inner, ShAmtC, Flags);
7597 };
7598 return true;
7599 }
7600
7601 // select Cond, -1, C --> or (sext Cond), C
7602 if (TrueValue.isAllOnes()) {
7603 MatchInfo = [=](MachineIRBuilder &B) {
7604 B.setInstrAndDebugLoc(*Select);
7605 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7606 B.buildSExtOrTrunc(Inner, Cond);
7607 B.buildOr(Dest, Inner, False, Flags);
7608 };
7609 return true;
7610 }
7611
7612 // select Cond, C, -1 --> or (sext (not Cond)), C
7613 if (FalseValue.isAllOnes()) {
7614 MatchInfo = [=](MachineIRBuilder &B) {
7615 B.setInstrAndDebugLoc(*Select);
7616 Register Not = MRI.createGenericVirtualRegister(CondTy);
7617 B.buildNot(Not, Cond);
7618 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7619 B.buildSExtOrTrunc(Inner, Not);
7620 B.buildOr(Dest, Inner, True, Flags);
7621 };
7622 return true;
7623 }
7624
7625 return false;
7626}
7627
7628// TODO: use knownbits to determine zeros
7629bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
7630 BuildFnTy &MatchInfo) const {
7631 uint32_t Flags = Select->getFlags();
7632 Register DstReg = Select->getReg(0);
7633 Register Cond = Select->getCondReg();
7634 Register True = Select->getTrueReg();
7635 Register False = Select->getFalseReg();
7636 LLT CondTy = MRI.getType(Select->getCondReg());
7637 LLT TrueTy = MRI.getType(Select->getTrueReg());
7638
7639 // Boolean or fixed vector of booleans.
7640 if (CondTy.isScalableVector() ||
7641 (CondTy.isFixedVector() &&
7642 CondTy.getElementType().getScalarSizeInBits() != 1) ||
7643 CondTy.getScalarSizeInBits() != 1)
7644 return false;
7645
7646 if (CondTy != TrueTy)
7647 return false;
7648
7649 // select Cond, Cond, F --> or Cond, F
7650 // select Cond, 1, F --> or Cond, F
7651 if ((Cond == True) || isOneOrOneSplat(True, /* AllowUndefs */ true)) {
7652 MatchInfo = [=](MachineIRBuilder &B) {
7653 B.setInstrAndDebugLoc(*Select);
7654 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7655 B.buildZExtOrTrunc(Ext, Cond);
7656 auto FreezeFalse = B.buildFreeze(TrueTy, False);
7657 B.buildOr(DstReg, Ext, FreezeFalse, Flags);
7658 };
7659 return true;
7660 }
7661
7662 // select Cond, T, Cond --> and Cond, T
7663 // select Cond, T, 0 --> and Cond, T
7664 if ((Cond == False) || isZeroOrZeroSplat(False, /* AllowUndefs */ true)) {
7665 MatchInfo = [=](MachineIRBuilder &B) {
7666 B.setInstrAndDebugLoc(*Select);
7667 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7668 B.buildZExtOrTrunc(Ext, Cond);
7669 auto FreezeTrue = B.buildFreeze(TrueTy, True);
7670 B.buildAnd(DstReg, Ext, FreezeTrue);
7671 };
7672 return true;
7673 }
7674
7675 // select Cond, T, 1 --> or (not Cond), T
7676 if (isOneOrOneSplat(False, /* AllowUndefs */ true)) {
7677 MatchInfo = [=](MachineIRBuilder &B) {
7678 B.setInstrAndDebugLoc(*Select);
7679 // First the not.
7680 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7681 B.buildNot(Inner, Cond);
7682 // Then an ext to match the destination register.
7683 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7684 B.buildZExtOrTrunc(Ext, Inner);
7685 auto FreezeTrue = B.buildFreeze(TrueTy, True);
7686 B.buildOr(DstReg, Ext, FreezeTrue, Flags);
7687 };
7688 return true;
7689 }
7690
7691 // select Cond, 0, F --> and (not Cond), F
7692 if (isZeroOrZeroSplat(True, /* AllowUndefs */ true)) {
7693 MatchInfo = [=](MachineIRBuilder &B) {
7694 B.setInstrAndDebugLoc(*Select);
7695 // First the not.
7696 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7697 B.buildNot(Inner, Cond);
7698 // Then an ext to match the destination register.
7699 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7700 B.buildZExtOrTrunc(Ext, Inner);
7701 auto FreezeFalse = B.buildFreeze(TrueTy, False);
7702 B.buildAnd(DstReg, Ext, FreezeFalse);
7703 };
7704 return true;
7705 }
7706
7707 return false;
7708}
7709
7711 BuildFnTy &MatchInfo) const {
7712 GSelect *Select = cast<GSelect>(MRI.getVRegDef(MO.getReg()));
7713 GICmp *Cmp = cast<GICmp>(MRI.getVRegDef(Select->getCondReg()));
7714
7715 Register DstReg = Select->getReg(0);
7716 Register True = Select->getTrueReg();
7717 Register False = Select->getFalseReg();
7718 LLT DstTy = MRI.getType(DstReg);
7719
7720 if (DstTy.isPointerOrPointerVector())
7721 return false;
7722
7723 // We want to fold the icmp and replace the select.
7724 if (!MRI.hasOneNonDBGUse(Cmp->getReg(0)))
7725 return false;
7726
7727 CmpInst::Predicate Pred = Cmp->getCond();
7728 // We need a larger or smaller predicate for
7729 // canonicalization.
7730 if (CmpInst::isEquality(Pred))
7731 return false;
7732
7733 Register CmpLHS = Cmp->getLHSReg();
7734 Register CmpRHS = Cmp->getRHSReg();
7735
7736 // We can swap CmpLHS and CmpRHS for higher hitrate.
7737 if (True == CmpRHS && False == CmpLHS) {
7738 std::swap(CmpLHS, CmpRHS);
7739 Pred = CmpInst::getSwappedPredicate(Pred);
7740 }
7741
7742 // (icmp X, Y) ? X : Y -> integer minmax.
7743 // see matchSelectPattern in ValueTracking.
7744 // Legality between G_SELECT and integer minmax can differ.
7745 if (True != CmpLHS || False != CmpRHS)
7746 return false;
7747
7748 switch (Pred) {
7749 case ICmpInst::ICMP_UGT:
7750 case ICmpInst::ICMP_UGE: {
7751 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
7752 return false;
7753 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMax(DstReg, True, False); };
7754 return true;
7755 }
7756 case ICmpInst::ICMP_SGT:
7757 case ICmpInst::ICMP_SGE: {
7758 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
7759 return false;
7760 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMax(DstReg, True, False); };
7761 return true;
7762 }
7763 case ICmpInst::ICMP_ULT:
7764 case ICmpInst::ICMP_ULE: {
7765 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
7766 return false;
7767 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMin(DstReg, True, False); };
7768 return true;
7769 }
7770 case ICmpInst::ICMP_SLT:
7771 case ICmpInst::ICMP_SLE: {
7772 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
7773 return false;
7774 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMin(DstReg, True, False); };
7775 return true;
7776 }
7777 default:
7778 return false;
7779 }
7780}
7781
7782// (neg (min/max x, (neg x))) --> (max/min x, (neg x))
7784 BuildFnTy &MatchInfo) const {
7785 assert(MI.getOpcode() == TargetOpcode::G_SUB);
7786 Register DestReg = MI.getOperand(0).getReg();
7787 LLT DestTy = MRI.getType(DestReg);
7788
7789 Register X;
7790 Register Sub0;
7791 auto NegPattern = m_all_of(m_Neg(m_DeferredReg(X)), m_Reg(Sub0));
7792 if (mi_match(DestReg, MRI,
7793 m_Neg(m_OneUse(m_any_of(m_GSMin(m_Reg(X), NegPattern),
7794 m_GSMax(m_Reg(X), NegPattern),
7795 m_GUMin(m_Reg(X), NegPattern),
7796 m_GUMax(m_Reg(X), NegPattern)))))) {
7797 MachineInstr *MinMaxMI = MRI.getVRegDef(MI.getOperand(2).getReg());
7798 unsigned NewOpc = getInverseGMinMaxOpcode(MinMaxMI->getOpcode());
7799 if (isLegal({NewOpc, {DestTy}})) {
7800 MatchInfo = [=](MachineIRBuilder &B) {
7801 B.buildInstr(NewOpc, {DestReg}, {X, Sub0});
7802 };
7803 return true;
7804 }
7805 }
7806
7807 return false;
7808}
7809
7812
7813 if (tryFoldSelectOfConstants(Select, MatchInfo))
7814 return true;
7815
7816 if (tryFoldBoolSelectToLogic(Select, MatchInfo))
7817 return true;
7818
7819 return false;
7820}
7821
7822/// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2)
7823/// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2)
7824/// into a single comparison using range-based reasoning.
7825/// see InstCombinerImpl::foldAndOrOfICmpsUsingRanges.
7826bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(
7827 GLogicalBinOp *Logic, BuildFnTy &MatchInfo) const {
7828 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpected xor");
7829 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
7830 Register DstReg = Logic->getReg(0);
7831 Register LHS = Logic->getLHSReg();
7832 Register RHS = Logic->getRHSReg();
7833 unsigned Flags = Logic->getFlags();
7834
7835 // We need an G_ICMP on the LHS register.
7836 GICmp *Cmp1 = getOpcodeDef<GICmp>(LHS, MRI);
7837 if (!Cmp1)
7838 return false;
7839
7840 // We need an G_ICMP on the RHS register.
7841 GICmp *Cmp2 = getOpcodeDef<GICmp>(RHS, MRI);
7842 if (!Cmp2)
7843 return false;
7844
7845 // We want to fold the icmps.
7846 if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7847 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)))
7848 return false;
7849
7850 APInt C1;
7851 APInt C2;
7852 std::optional<ValueAndVReg> MaybeC1 =
7854 if (!MaybeC1)
7855 return false;
7856 C1 = MaybeC1->Value;
7857
7858 std::optional<ValueAndVReg> MaybeC2 =
7860 if (!MaybeC2)
7861 return false;
7862 C2 = MaybeC2->Value;
7863
7864 Register R1 = Cmp1->getLHSReg();
7865 Register R2 = Cmp2->getLHSReg();
7866 CmpInst::Predicate Pred1 = Cmp1->getCond();
7867 CmpInst::Predicate Pred2 = Cmp2->getCond();
7868 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7869 LLT CmpOperandTy = MRI.getType(R1);
7870
7871 if (CmpOperandTy.isPointer())
7872 return false;
7873
7874 // We build ands, adds, and constants of type CmpOperandTy.
7875 // They must be legal to build.
7876 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) ||
7877 !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, CmpOperandTy}) ||
7878 !isConstantLegalOrBeforeLegalizer(CmpOperandTy))
7879 return false;
7880
7881 // Look through add of a constant offset on R1, R2, or both operands. This
7882 // allows us to interpret the R + C' < C'' range idiom into a proper range.
7883 std::optional<APInt> Offset1;
7884 std::optional<APInt> Offset2;
7885 if (R1 != R2) {
7886 if (GAdd *Add = getOpcodeDef<GAdd>(R1, MRI)) {
7887 std::optional<ValueAndVReg> MaybeOffset1 =
7889 if (MaybeOffset1) {
7890 R1 = Add->getLHSReg();
7891 Offset1 = MaybeOffset1->Value;
7892 }
7893 }
7894 if (GAdd *Add = getOpcodeDef<GAdd>(R2, MRI)) {
7895 std::optional<ValueAndVReg> MaybeOffset2 =
7897 if (MaybeOffset2) {
7898 R2 = Add->getLHSReg();
7899 Offset2 = MaybeOffset2->Value;
7900 }
7901 }
7902 }
7903
7904 if (R1 != R2)
7905 return false;
7906
7907 // We calculate the icmp ranges including maybe offsets.
7908 ConstantRange CR1 = ConstantRange::makeExactICmpRegion(
7909 IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, C1);
7910 if (Offset1)
7911 CR1 = CR1.subtract(*Offset1);
7912
7913 ConstantRange CR2 = ConstantRange::makeExactICmpRegion(
7914 IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, C2);
7915 if (Offset2)
7916 CR2 = CR2.subtract(*Offset2);
7917
7918 bool CreateMask = false;
7919 APInt LowerDiff;
7920 std::optional<ConstantRange> CR = CR1.exactUnionWith(CR2);
7921 if (!CR) {
7922 // We need non-wrapping ranges.
7923 if (CR1.isWrappedSet() || CR2.isWrappedSet())
7924 return false;
7925
7926 // Check whether we have equal-size ranges that only differ by one bit.
7927 // In that case we can apply a mask to map one range onto the other.
7928 LowerDiff = CR1.getLower() ^ CR2.getLower();
7929 APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1);
7930 APInt CR1Size = CR1.getUpper() - CR1.getLower();
7931 if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff ||
7932 CR1Size != CR2.getUpper() - CR2.getLower())
7933 return false;
7934
7935 CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2;
7936 CreateMask = true;
7937 }
7938
7939 if (IsAnd)
7940 CR = CR->inverse();
7941
7942 CmpInst::Predicate NewPred;
7943 APInt NewC, Offset;
7944 CR->getEquivalentICmp(NewPred, NewC, Offset);
7945
7946 // We take the result type of one of the original icmps, CmpTy, for
7947 // the to be build icmp. The operand type, CmpOperandTy, is used for
7948 // the other instructions and constants to be build. The types of
7949 // the parameters and output are the same for add and and. CmpTy
7950 // and the type of DstReg might differ. That is why we zext or trunc
7951 // the icmp into the destination register.
7952
7953 MatchInfo = [=](MachineIRBuilder &B) {
7954 if (CreateMask && Offset != 0) {
7955 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
7956 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
7957 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
7958 auto Add = B.buildAdd(CmpOperandTy, And, OffsetC, Flags);
7959 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7960 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
7961 B.buildZExtOrTrunc(DstReg, ICmp);
7962 } else if (CreateMask && Offset == 0) {
7963 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
7964 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
7965 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7966 auto ICmp = B.buildICmp(NewPred, CmpTy, And, NewCon);
7967 B.buildZExtOrTrunc(DstReg, ICmp);
7968 } else if (!CreateMask && Offset != 0) {
7969 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
7970 auto Add = B.buildAdd(CmpOperandTy, R1, OffsetC, Flags);
7971 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7972 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
7973 B.buildZExtOrTrunc(DstReg, ICmp);
7974 } else if (!CreateMask && Offset == 0) {
7975 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7976 auto ICmp = B.buildICmp(NewPred, CmpTy, R1, NewCon);
7977 B.buildZExtOrTrunc(DstReg, ICmp);
7978 } else {
7979 llvm_unreachable("unexpected configuration of CreateMask and Offset");
7980 }
7981 };
7982 return true;
7983}
7984
7985bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic,
7986 BuildFnTy &MatchInfo) const {
7987 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor");
7988 Register DestReg = Logic->getReg(0);
7989 Register LHS = Logic->getLHSReg();
7990 Register RHS = Logic->getRHSReg();
7991 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
7992
7993 // We need a compare on the LHS register.
7994 GFCmp *Cmp1 = getOpcodeDef<GFCmp>(LHS, MRI);
7995 if (!Cmp1)
7996 return false;
7997
7998 // We need a compare on the RHS register.
7999 GFCmp *Cmp2 = getOpcodeDef<GFCmp>(RHS, MRI);
8000 if (!Cmp2)
8001 return false;
8002
8003 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
8004 LLT CmpOperandTy = MRI.getType(Cmp1->getLHSReg());
8005
8006 // We build one fcmp, want to fold the fcmps, replace the logic op,
8007 // and the fcmps must have the same shape.
8009 {TargetOpcode::G_FCMP, {CmpTy, CmpOperandTy}}) ||
8010 !MRI.hasOneNonDBGUse(Logic->getReg(0)) ||
8011 !MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
8012 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) ||
8013 MRI.getType(Cmp1->getLHSReg()) != MRI.getType(Cmp2->getLHSReg()))
8014 return false;
8015
8016 CmpInst::Predicate PredL = Cmp1->getCond();
8017 CmpInst::Predicate PredR = Cmp2->getCond();
8018 Register LHS0 = Cmp1->getLHSReg();
8019 Register LHS1 = Cmp1->getRHSReg();
8020 Register RHS0 = Cmp2->getLHSReg();
8021 Register RHS1 = Cmp2->getRHSReg();
8022
8023 if (LHS0 == RHS1 && LHS1 == RHS0) {
8024 // Swap RHS operands to match LHS.
8025 PredR = CmpInst::getSwappedPredicate(PredR);
8026 std::swap(RHS0, RHS1);
8027 }
8028
8029 if (LHS0 == RHS0 && LHS1 == RHS1) {
8030 // We determine the new predicate.
8031 unsigned CmpCodeL = getFCmpCode(PredL);
8032 unsigned CmpCodeR = getFCmpCode(PredR);
8033 unsigned NewPred = IsAnd ? CmpCodeL & CmpCodeR : CmpCodeL | CmpCodeR;
8034 unsigned Flags = Cmp1->getFlags() | Cmp2->getFlags();
8035 MatchInfo = [=](MachineIRBuilder &B) {
8036 // The fcmp predicates fill the lower part of the enum.
8037 FCmpInst::Predicate Pred = static_cast<FCmpInst::Predicate>(NewPred);
8038 if (Pred == FCmpInst::FCMP_FALSE &&
8040 auto False = B.buildConstant(CmpTy, 0);
8041 B.buildZExtOrTrunc(DestReg, False);
8042 } else if (Pred == FCmpInst::FCMP_TRUE &&
8044 auto True =
8045 B.buildConstant(CmpTy, getICmpTrueVal(getTargetLowering(),
8046 CmpTy.isVector() /*isVector*/,
8047 true /*isFP*/));
8048 B.buildZExtOrTrunc(DestReg, True);
8049 } else { // We take the predicate without predicate optimizations.
8050 auto Cmp = B.buildFCmp(Pred, CmpTy, LHS0, LHS1, Flags);
8051 B.buildZExtOrTrunc(DestReg, Cmp);
8052 }
8053 };
8054 return true;
8055 }
8056
8057 return false;
8058}
8059
8061 GAnd *And = cast<GAnd>(&MI);
8062
8063 if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
8064 return true;
8065
8066 if (tryFoldLogicOfFCmps(And, MatchInfo))
8067 return true;
8068
8069 return false;
8070}
8071
8073 GOr *Or = cast<GOr>(&MI);
8074
8075 if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo))
8076 return true;
8077
8078 if (tryFoldLogicOfFCmps(Or, MatchInfo))
8079 return true;
8080
8081 return false;
8082}
8083
8085 BuildFnTy &MatchInfo) const {
8087
8088 // Addo has no flags
8089 Register Dst = Add->getReg(0);
8090 Register Carry = Add->getReg(1);
8091 Register LHS = Add->getLHSReg();
8092 Register RHS = Add->getRHSReg();
8093 bool IsSigned = Add->isSigned();
8094 LLT DstTy = MRI.getType(Dst);
8095 LLT CarryTy = MRI.getType(Carry);
8096
8097 // Fold addo, if the carry is dead -> add, undef.
8098 if (MRI.use_nodbg_empty(Carry) &&
8099 isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
8100 MatchInfo = [=](MachineIRBuilder &B) {
8101 B.buildAdd(Dst, LHS, RHS);
8102 B.buildUndef(Carry);
8103 };
8104 return true;
8105 }
8106
8107 // Canonicalize constant to RHS.
8108 if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
8109 if (IsSigned) {
8110 MatchInfo = [=](MachineIRBuilder &B) {
8111 B.buildSAddo(Dst, Carry, RHS, LHS);
8112 };
8113 return true;
8114 }
8115 // !IsSigned
8116 MatchInfo = [=](MachineIRBuilder &B) {
8117 B.buildUAddo(Dst, Carry, RHS, LHS);
8118 };
8119 return true;
8120 }
8121
8122 std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS);
8123 std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS);
8124
8125 // Fold addo(c1, c2) -> c3, carry.
8126 if (MaybeLHS && MaybeRHS && isConstantLegalOrBeforeLegalizer(DstTy) &&
8128 bool Overflow;
8129 APInt Result = IsSigned ? MaybeLHS->sadd_ov(*MaybeRHS, Overflow)
8130 : MaybeLHS->uadd_ov(*MaybeRHS, Overflow);
8131 MatchInfo = [=](MachineIRBuilder &B) {
8132 B.buildConstant(Dst, Result);
8133 B.buildConstant(Carry, Overflow);
8134 };
8135 return true;
8136 }
8137
8138 // Fold (addo x, 0) -> x, no carry
8139 if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
8140 MatchInfo = [=](MachineIRBuilder &B) {
8141 B.buildCopy(Dst, LHS);
8142 B.buildConstant(Carry, 0);
8143 };
8144 return true;
8145 }
8146
8147 // Given 2 constant operands whose sum does not overflow:
8148 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
8149 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
8150 GAdd *AddLHS = getOpcodeDef<GAdd>(LHS, MRI);
8151 if (MaybeRHS && AddLHS && MRI.hasOneNonDBGUse(Add->getReg(0)) &&
8152 ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) ||
8153 (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {
8154 std::optional<APInt> MaybeAddRHS =
8155 getConstantOrConstantSplatVector(AddLHS->getRHSReg());
8156 if (MaybeAddRHS) {
8157 bool Overflow;
8158 APInt NewC = IsSigned ? MaybeAddRHS->sadd_ov(*MaybeRHS, Overflow)
8159 : MaybeAddRHS->uadd_ov(*MaybeRHS, Overflow);
8160 if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) {
8161 if (IsSigned) {
8162 MatchInfo = [=](MachineIRBuilder &B) {
8163 auto ConstRHS = B.buildConstant(DstTy, NewC);
8164 B.buildSAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
8165 };
8166 return true;
8167 }
8168 // !IsSigned
8169 MatchInfo = [=](MachineIRBuilder &B) {
8170 auto ConstRHS = B.buildConstant(DstTy, NewC);
8171 B.buildUAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
8172 };
8173 return true;
8174 }
8175 }
8176 };
8177
8178 // We try to combine addo to non-overflowing add.
8179 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) ||
8181 return false;
8182
8183 // We try to combine uaddo to non-overflowing add.
8184 if (!IsSigned) {
8185 ConstantRange CRLHS =
8186 ConstantRange::fromKnownBits(VT->getKnownBits(LHS), /*IsSigned=*/false);
8187 ConstantRange CRRHS =
8188 ConstantRange::fromKnownBits(VT->getKnownBits(RHS), /*IsSigned=*/false);
8189
8190 switch (CRLHS.unsignedAddMayOverflow(CRRHS)) {
8192 return false;
8194 MatchInfo = [=](MachineIRBuilder &B) {
8195 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
8196 B.buildConstant(Carry, 0);
8197 };
8198 return true;
8199 }
8202 MatchInfo = [=](MachineIRBuilder &B) {
8203 B.buildAdd(Dst, LHS, RHS);
8204 B.buildConstant(Carry, 1);
8205 };
8206 return true;
8207 }
8208 }
8209 return false;
8210 }
8211
8212 // We try to combine saddo to non-overflowing add.
8213
8214 // If LHS and RHS each have at least two sign bits, then there is no signed
8215 // overflow.
8216 if (VT->computeNumSignBits(RHS) > 1 && VT->computeNumSignBits(LHS) > 1) {
8217 MatchInfo = [=](MachineIRBuilder &B) {
8218 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8219 B.buildConstant(Carry, 0);
8220 };
8221 return true;
8222 }
8223
8224 ConstantRange CRLHS =
8225 ConstantRange::fromKnownBits(VT->getKnownBits(LHS), /*IsSigned=*/true);
8226 ConstantRange CRRHS =
8227 ConstantRange::fromKnownBits(VT->getKnownBits(RHS), /*IsSigned=*/true);
8228
8229 switch (CRLHS.signedAddMayOverflow(CRRHS)) {
8231 return false;
8233 MatchInfo = [=](MachineIRBuilder &B) {
8234 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8235 B.buildConstant(Carry, 0);
8236 };
8237 return true;
8238 }
8241 MatchInfo = [=](MachineIRBuilder &B) {
8242 B.buildAdd(Dst, LHS, RHS);
8243 B.buildConstant(Carry, 1);
8244 };
8245 return true;
8246 }
8247 }
8248
8249 return false;
8250}
8251
8253 BuildFnTy &MatchInfo) const {
8255 MatchInfo(Builder);
8256 Root->eraseFromParent();
8257}
8258
8260 int64_t Exponent) const {
8261 bool OptForSize = MI.getMF()->getFunction().hasOptSize();
8263}
8264
8266 int64_t Exponent) const {
8267 auto [Dst, Base] = MI.getFirst2Regs();
8268 LLT Ty = MRI.getType(Dst);
8269 int64_t ExpVal = Exponent;
8270
8271 if (ExpVal == 0) {
8272 Builder.buildFConstant(Dst, 1.0);
8273 MI.removeFromParent();
8274 return;
8275 }
8276
8277 if (ExpVal < 0)
8278 ExpVal = -ExpVal;
8279
8280 // We use the simple binary decomposition method from SelectionDAG ExpandPowI
8281 // to generate the multiply sequence. There are more optimal ways to do this
8282 // (for example, powi(x,15) generates one more multiply than it should), but
8283 // this has the benefit of being both really simple and much better than a
8284 // libcall.
8285 std::optional<SrcOp> Res;
8286 SrcOp CurSquare = Base;
8287 while (ExpVal > 0) {
8288 if (ExpVal & 1) {
8289 if (!Res)
8290 Res = CurSquare;
8291 else
8292 Res = Builder.buildFMul(Ty, *Res, CurSquare);
8293 }
8294
8295 CurSquare = Builder.buildFMul(Ty, CurSquare, CurSquare);
8296 ExpVal >>= 1;
8297 }
8298
8299 // If the original exponent was negative, invert the result, producing
8300 // 1/(x*x*x).
8301 if (Exponent < 0)
8302 Res = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0), *Res,
8303 MI.getFlags());
8304
8305 Builder.buildCopy(Dst, *Res);
8306 MI.eraseFromParent();
8307}
8308
8310 BuildFnTy &MatchInfo) const {
8311 // fold (A+C1)-C2 -> A+(C1-C2)
8312 const GSub *Sub = cast<GSub>(&MI);
8313 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getLHSReg()));
8314
8315 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
8316 return false;
8317
8318 APInt C2 = getIConstantFromReg(Sub->getRHSReg(), MRI);
8319 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
8320
8321 Register Dst = Sub->getReg(0);
8322 LLT DstTy = MRI.getType(Dst);
8323
8324 MatchInfo = [=](MachineIRBuilder &B) {
8325 auto Const = B.buildConstant(DstTy, C1 - C2);
8326 B.buildAdd(Dst, Add->getLHSReg(), Const);
8327 };
8328
8329 return true;
8330}
8331
8333 BuildFnTy &MatchInfo) const {
8334 // fold C2-(A+C1) -> (C2-C1)-A
8335 const GSub *Sub = cast<GSub>(&MI);
8336 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getRHSReg()));
8337
8338 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
8339 return false;
8340
8341 APInt C2 = getIConstantFromReg(Sub->getLHSReg(), MRI);
8342 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
8343
8344 Register Dst = Sub->getReg(0);
8345 LLT DstTy = MRI.getType(Dst);
8346
8347 MatchInfo = [=](MachineIRBuilder &B) {
8348 auto Const = B.buildConstant(DstTy, C2 - C1);
8349 B.buildSub(Dst, Const, Add->getLHSReg());
8350 };
8351
8352 return true;
8353}
8354
8356 BuildFnTy &MatchInfo) const {
8357 // fold (A-C1)-C2 -> A-(C1+C2)
8358 const GSub *Sub1 = cast<GSub>(&MI);
8359 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
8360
8361 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
8362 return false;
8363
8364 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
8365 APInt C1 = getIConstantFromReg(Sub2->getRHSReg(), MRI);
8366
8367 Register Dst = Sub1->getReg(0);
8368 LLT DstTy = MRI.getType(Dst);
8369
8370 MatchInfo = [=](MachineIRBuilder &B) {
8371 auto Const = B.buildConstant(DstTy, C1 + C2);
8372 B.buildSub(Dst, Sub2->getLHSReg(), Const);
8373 };
8374
8375 return true;
8376}
8377
8379 BuildFnTy &MatchInfo) const {
8380 // fold (C1-A)-C2 -> (C1-C2)-A
8381 const GSub *Sub1 = cast<GSub>(&MI);
8382 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
8383
8384 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
8385 return false;
8386
8387 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
8388 APInt C1 = getIConstantFromReg(Sub2->getLHSReg(), MRI);
8389
8390 Register Dst = Sub1->getReg(0);
8391 LLT DstTy = MRI.getType(Dst);
8392
8393 MatchInfo = [=](MachineIRBuilder &B) {
8394 auto Const = B.buildConstant(DstTy, C1 - C2);
8395 B.buildSub(Dst, Const, Sub2->getRHSReg());
8396 };
8397
8398 return true;
8399}
8400
8402 BuildFnTy &MatchInfo) const {
8403 // fold ((A-C1)+C2) -> (A+(C2-C1))
8404 const GAdd *Add = cast<GAdd>(&MI);
8405 GSub *Sub = cast<GSub>(MRI.getVRegDef(Add->getLHSReg()));
8406
8407 if (!MRI.hasOneNonDBGUse(Sub->getReg(0)))
8408 return false;
8409
8410 APInt C2 = getIConstantFromReg(Add->getRHSReg(), MRI);
8411 APInt C1 = getIConstantFromReg(Sub->getRHSReg(), MRI);
8412
8413 Register Dst = Add->getReg(0);
8414 LLT DstTy = MRI.getType(Dst);
8415
8416 MatchInfo = [=](MachineIRBuilder &B) {
8417 auto Const = B.buildConstant(DstTy, C2 - C1);
8418 B.buildAdd(Dst, Sub->getLHSReg(), Const);
8419 };
8420
8421 return true;
8422}
8423
8425 const MachineInstr &MI, BuildFnTy &MatchInfo) const {
8426 const GUnmerge *Unmerge = cast<GUnmerge>(&MI);
8427
8428 if (!MRI.hasOneNonDBGUse(Unmerge->getSourceReg()))
8429 return false;
8430
8431 const MachineInstr *Source = MRI.getVRegDef(Unmerge->getSourceReg());
8432
8433 LLT DstTy = MRI.getType(Unmerge->getReg(0));
8434
8435 // $bv:_(<8 x s8>) = G_BUILD_VECTOR ....
8436 // $any:_(<8 x s16>) = G_ANYEXT $bv
8437 // $uv:_(<4 x s16>), $uv1:_(<4 x s16>) = G_UNMERGE_VALUES $any
8438 //
8439 // ->
8440 //
8441 // $any:_(s16) = G_ANYEXT $bv[0]
8442 // $any1:_(s16) = G_ANYEXT $bv[1]
8443 // $any2:_(s16) = G_ANYEXT $bv[2]
8444 // $any3:_(s16) = G_ANYEXT $bv[3]
8445 // $any4:_(s16) = G_ANYEXT $bv[4]
8446 // $any5:_(s16) = G_ANYEXT $bv[5]
8447 // $any6:_(s16) = G_ANYEXT $bv[6]
8448 // $any7:_(s16) = G_ANYEXT $bv[7]
8449 // $uv:_(<4 x s16>) = G_BUILD_VECTOR $any, $any1, $any2, $any3
8450 // $uv1:_(<4 x s16>) = G_BUILD_VECTOR $any4, $any5, $any6, $any7
8451
8452 // We want to unmerge into vectors.
8453 if (!DstTy.isFixedVector())
8454 return false;
8455
8456 const GAnyExt *Any = dyn_cast<GAnyExt>(Source);
8457 if (!Any)
8458 return false;
8459
8460 const MachineInstr *NextSource = MRI.getVRegDef(Any->getSrcReg());
8461
8462 if (const GBuildVector *BV = dyn_cast<GBuildVector>(NextSource)) {
8463 // G_UNMERGE_VALUES G_ANYEXT G_BUILD_VECTOR
8464
8465 if (!MRI.hasOneNonDBGUse(BV->getReg(0)))
8466 return false;
8467
8468 // FIXME: check element types?
8469 if (BV->getNumSources() % Unmerge->getNumDefs() != 0)
8470 return false;
8471
8472 LLT BigBvTy = MRI.getType(BV->getReg(0));
8473 LLT SmallBvTy = DstTy;
8474 LLT SmallBvElemenTy = SmallBvTy.getElementType();
8475
8477 {TargetOpcode::G_BUILD_VECTOR, {SmallBvTy, SmallBvElemenTy}}))
8478 return false;
8479
8480 // We check the legality of scalar anyext.
8482 {TargetOpcode::G_ANYEXT,
8483 {SmallBvElemenTy, BigBvTy.getElementType()}}))
8484 return false;
8485
8486 MatchInfo = [=](MachineIRBuilder &B) {
8487 // Build into each G_UNMERGE_VALUES def
8488 // a small build vector with anyext from the source build vector.
8489 for (unsigned I = 0; I < Unmerge->getNumDefs(); ++I) {
8491 for (unsigned J = 0; J < SmallBvTy.getNumElements(); ++J) {
8492 Register SourceArray =
8493 BV->getSourceReg(I * SmallBvTy.getNumElements() + J);
8494 auto AnyExt = B.buildAnyExt(SmallBvElemenTy, SourceArray);
8495 Ops.push_back(AnyExt.getReg(0));
8496 }
8497 B.buildBuildVector(Unmerge->getOperand(I).getReg(), Ops);
8498 };
8499 };
8500 return true;
8501 };
8502
8503 return false;
8504}
8505
8507 BuildFnTy &MatchInfo) const {
8508
8509 bool Changed = false;
8510 auto &Shuffle = cast<GShuffleVector>(MI);
8511 ArrayRef<int> OrigMask = Shuffle.getMask();
8512 SmallVector<int, 16> NewMask;
8513 const LLT SrcTy = MRI.getType(Shuffle.getSrc1Reg());
8514 const unsigned NumSrcElems = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
8515 const unsigned NumDstElts = OrigMask.size();
8516 for (unsigned i = 0; i != NumDstElts; ++i) {
8517 int Idx = OrigMask[i];
8518 if (Idx >= (int)NumSrcElems) {
8519 Idx = -1;
8520 Changed = true;
8521 }
8522 NewMask.push_back(Idx);
8523 }
8524
8525 if (!Changed)
8526 return false;
8527
8528 MatchInfo = [&, NewMask = std::move(NewMask)](MachineIRBuilder &B) {
8529 B.buildShuffleVector(MI.getOperand(0), MI.getOperand(1), MI.getOperand(2),
8530 std::move(NewMask));
8531 };
8532
8533 return true;
8534}
8535
8536static void commuteMask(MutableArrayRef<int> Mask, const unsigned NumElems) {
8537 const unsigned MaskSize = Mask.size();
8538 for (unsigned I = 0; I < MaskSize; ++I) {
8539 int Idx = Mask[I];
8540 if (Idx < 0)
8541 continue;
8542
8543 if (Idx < (int)NumElems)
8544 Mask[I] = Idx + NumElems;
8545 else
8546 Mask[I] = Idx - NumElems;
8547 }
8548}
8549
8551 BuildFnTy &MatchInfo) const {
8552
8553 auto &Shuffle = cast<GShuffleVector>(MI);
8554 // If any of the two inputs is already undef, don't check the mask again to
8555 // prevent infinite loop
8556 if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc1Reg(), MRI))
8557 return false;
8558
8559 if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc2Reg(), MRI))
8560 return false;
8561
8562 const LLT DstTy = MRI.getType(Shuffle.getReg(0));
8563 const LLT Src1Ty = MRI.getType(Shuffle.getSrc1Reg());
8565 {TargetOpcode::G_SHUFFLE_VECTOR, {DstTy, Src1Ty}}))
8566 return false;
8567
8568 ArrayRef<int> Mask = Shuffle.getMask();
8569 const unsigned NumSrcElems = Src1Ty.getNumElements();
8570
8571 bool TouchesSrc1 = false;
8572 bool TouchesSrc2 = false;
8573 const unsigned NumElems = Mask.size();
8574 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
8575 if (Mask[Idx] < 0)
8576 continue;
8577
8578 if (Mask[Idx] < (int)NumSrcElems)
8579 TouchesSrc1 = true;
8580 else
8581 TouchesSrc2 = true;
8582 }
8583
8584 if (TouchesSrc1 == TouchesSrc2)
8585 return false;
8586
8587 Register NewSrc1 = Shuffle.getSrc1Reg();
8588 SmallVector<int, 16> NewMask(Mask);
8589 if (TouchesSrc2) {
8590 NewSrc1 = Shuffle.getSrc2Reg();
8591 commuteMask(NewMask, NumSrcElems);
8592 }
8593
8594 MatchInfo = [=, &Shuffle](MachineIRBuilder &B) {
8595 auto Undef = B.buildUndef(Src1Ty);
8596 B.buildShuffleVector(Shuffle.getReg(0), NewSrc1, Undef, NewMask);
8597 };
8598
8599 return true;
8600}
8601
8603 BuildFnTy &MatchInfo) const {
8604 const GSubCarryOut *Subo = cast<GSubCarryOut>(&MI);
8605
8606 Register Dst = Subo->getReg(0);
8607 Register LHS = Subo->getLHSReg();
8608 Register RHS = Subo->getRHSReg();
8609 Register Carry = Subo->getCarryOutReg();
8610 LLT DstTy = MRI.getType(Dst);
8611 LLT CarryTy = MRI.getType(Carry);
8612
8613 // Check legality before known bits.
8614 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy}}) ||
8616 return false;
8617
8618 ConstantRange KBLHS =
8619 ConstantRange::fromKnownBits(VT->getKnownBits(LHS),
8620 /* IsSigned=*/Subo->isSigned());
8621 ConstantRange KBRHS =
8622 ConstantRange::fromKnownBits(VT->getKnownBits(RHS),
8623 /* IsSigned=*/Subo->isSigned());
8624
8625 if (Subo->isSigned()) {
8626 // G_SSUBO
8627 switch (KBLHS.signedSubMayOverflow(KBRHS)) {
8629 return false;
8631 MatchInfo = [=](MachineIRBuilder &B) {
8632 B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8633 B.buildConstant(Carry, 0);
8634 };
8635 return true;
8636 }
8639 MatchInfo = [=](MachineIRBuilder &B) {
8640 B.buildSub(Dst, LHS, RHS);
8641 B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
8642 /*isVector=*/CarryTy.isVector(),
8643 /*isFP=*/false));
8644 };
8645 return true;
8646 }
8647 }
8648 return false;
8649 }
8650
8651 // G_USUBO
8652 switch (KBLHS.unsignedSubMayOverflow(KBRHS)) {
8654 return false;
8656 MatchInfo = [=](MachineIRBuilder &B) {
8657 B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
8658 B.buildConstant(Carry, 0);
8659 };
8660 return true;
8661 }
8664 MatchInfo = [=](MachineIRBuilder &B) {
8665 B.buildSub(Dst, LHS, RHS);
8666 B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
8667 /*isVector=*/CarryTy.isVector(),
8668 /*isFP=*/false));
8669 };
8670 return true;
8671 }
8672 }
8673
8674 return false;
8675}
8676
8677// Fold (ctlz (xor x, (sra x, bitwidth-1))) -> (add (ctls x), 1).
8678// Fold (ctlz (or (shl (xor x, (sra x, bitwidth-1)), 1), 1) -> (ctls x)
8680 BuildFnTy &MatchInfo) const {
8681 assert((CtlzMI.getOpcode() == TargetOpcode::G_CTLZ ||
8682 CtlzMI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_POISON) &&
8683 "Expected G_CTLZ variant");
8684
8685 const Register Dst = CtlzMI.getOperand(0).getReg();
8686 Register Src = CtlzMI.getOperand(1).getReg();
8687
8688 LLT Ty = MRI.getType(Dst);
8689 LLT SrcTy = MRI.getType(Src);
8690
8691 if (!(Ty.isValid() && Ty.isScalar()))
8692 return false;
8693
8694 if (!LI)
8695 return false;
8696
8697 SmallVector<LLT, 2> QueryTypes = {Ty, SrcTy};
8698 LegalityQuery Query(TargetOpcode::G_CTLS, QueryTypes);
8699
8700 switch (LI->getAction(Query).Action) {
8701 default:
8702 return false;
8706 break;
8707 }
8708
8709 // Src = or(shl(V, 1), 1) -> Src=V; NeedAdd = False
8710 Register V;
8711 bool NeedAdd = true;
8712 if (mi_match(Src, MRI,
8714 m_SpecificICst(1))))) {
8715 NeedAdd = false;
8716 Src = V;
8717 }
8718
8719 unsigned BitWidth = Ty.getScalarSizeInBits();
8720
8721 Register X;
8722 if (!mi_match(Src, MRI,
8725 m_SpecificICst(BitWidth - 1)))))))
8726 return false;
8727
8728 MatchInfo = [=](MachineIRBuilder &B) {
8729 if (!NeedAdd) {
8730 B.buildCTLS(Dst, X);
8731 return;
8732 }
8733
8734 auto Ctls = B.buildCTLS(Ty, X);
8735 auto One = B.buildConstant(Ty, 1);
8736
8737 B.buildAdd(Dst, Ctls, One);
8738 };
8739
8740 return true;
8741}
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
AMDGPU Register Bank Select
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo &MRI)
static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally)
Checks if MI is TargetOpcode::G_FMUL and contractable either due to global flags or MachineInstr flag...
static unsigned getIndexedOpc(unsigned LdStOpc)
static APFloat constantFoldFpUnary(const MachineInstr &MI, const MachineRegisterInfo &MRI, const APFloat &Val)
static std::optional< std::pair< GZExtLoad *, int64_t > > matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, const MachineRegisterInfo &MRI)
Helper function for findLoadOffsetsForLoadOrCombine.
static std::optional< unsigned > getMinUselessShift(KnownBits ValueKB, unsigned Opcode, std::optional< int64_t > &Result)
Return the minimum useless shift amount that results in complete loss of the source value.
static Register peekThroughBitcast(Register Reg, const MachineRegisterInfo &MRI)
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static cl::opt< bool > ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), cl::desc("Force all indexed operations to be " "legal for the GlobalISel combiner"))
static void commuteMask(MutableArrayRef< int > Mask, const unsigned NumElems)
static cl::opt< unsigned > PostIndexUseThreshold("post-index-use-threshold", cl::Hidden, cl::init(32), cl::desc("Number of uses of a base pointer to check before it is no longer " "considered for post-indexing."))
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static unsigned getExtLoadOpcForExtend(unsigned ExtOpc)
static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, int64_t Cst, bool IsVector, bool IsFP)
static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy)
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static Register buildLogBase2(Register V, MachineIRBuilder &MIB)
Determines the LogBase2 value for a non-null input value using the transform: LogBase2(V) = (EltBits ...
This contains common combine transformations that may be used in a combine pass,or by the target else...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
Interface for Targets to specify which operations they can successfully select and how the others sho...
static bool isConstantSplatVector(SDValue N, APInt &SplatValue, unsigned MinSizeInBits)
Implement a low-level type suitable for MachineInstr level instruction selection.
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Register Reg
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:347
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:346
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:349
const fltSemantics & getSemantics() const
Definition APFloat.h:1546
bool isNaN() const
Definition APFloat.h:1536
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition APFloat.h:1294
APInt bitcastToAPInt() const
Definition APFloat.h:1430
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1076
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1709
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
int32_t exactLogBase2() const
Definition APInt.h:1806
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition APInt.h:841
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1621
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition APInt.cpp:1084
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
LLVM_ABI APInt multiplicativeInverse() const
Definition APInt.cpp:1317
bool isMask(unsigned numBits) const
Definition APInt.h:489
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:1028
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:390
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1679
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition InstrTypes.h:915
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition InstrTypes.h:693
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition InstrTypes.h:678
static LLVM_ABI bool isEquality(Predicate pred)
Determine if this is an equals/not equals predicate.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
static LLVM_ABI bool isOrdered(Predicate predicate)
Determine if the predicate is an ordered operation.
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo) const
bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchRepeatedFPDivisor(MachineInstr &MI, SmallVector< MachineInstr * > &MatchInfo) const
bool matchFoldC2MinusAPlusC1(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match expression trees of the form.
const RegisterBank * getRegBank(Register Reg) const
Get the register bank of Reg.
void applyPtrAddZero(MachineInstr &MI) const
bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2) const
Return true if MOP1 and MOP2 are register operands are defined by equivalent instructions.
void applyUDivOrURemByConst(MachineInstr &MI) const
bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo) const
Do constant folding when opportunities are exposed after MIR building.
void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const
bool matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchCtls(MachineInstr &CtlzMI, BuildFnTy &MatchInfo) const
bool matchSelectSameVal(MachineInstr &MI) const
Optimize (cond ? x : x) -> x.
bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_*ADDE x, y, 0) -> (G_*ADDO x, y) (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width.
bool matchFoldAMinusC1PlusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchTruncSSatU(MachineInstr &MI, Register &MatchInfo) const
void applySimplifyURemByPow2(MachineInstr &MI) const
Combine G_UREM x, (known power of 2) to an add and bitmasking.
bool matchCombineUnmergeZExtToZExt(MachineInstr &MI) const
Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0.
bool matchPtrAddZero(MachineInstr &MI) const
}
const TargetInstrInfo * TII
void applyCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops) const
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
void applyXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo) const
bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, bool &HasFMAD, bool &Aggressive, bool CanReassociate=false) const
bool matchFoldAPlusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const
void applyCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts) const
bool matchShiftsTooBig(MachineInstr &MI, std::optional< int64_t > &MatchInfo) const
Match shifts greater or equal to the range (the bitwidth of the result datatype, or the effective bit...
bool matchCombineFAddFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) (fadd (fpext (fmul x,...
bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const
void applyCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops) const
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
void replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement) const
Delete MI and replace all of its uses with Replacement.
void applyCombineShuffleToBuildVector(MachineInstr &MI) const
Replace MI with a build_vector.
bool matchCombineExtractedVectorLoad(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine a G_EXTRACT_VECTOR_ELT of a load into a narrowed load.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo) const
Reassociate commutative binary operations like G_ADD.
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCommuteConstantToRHS(MachineInstr &MI) const
Match constant LHS ops that should be commuted.
const DataLayout & getDataLayout() const
bool matchBinOpSameVal(MachineInstr &MI) const
Optimize (x op x) -> x.
bool matchSimplifyNegMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const
Tranform (neg (min/max x, (neg x))) into (max/min x, (neg x)).
bool matchCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const
Try to combine G_[SU]DIV and G_[SU]REM into a single G_[SU]DIVREM when their source operands are iden...
void applyUMulHToLShr(MachineInstr &MI) const
void applyNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate) const
bool isLegalOrHasFewerElements(const LegalityQuery &Query) const
bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const
Fold (shift (shift base, x), y) -> (shift base (x+y))
void applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) const
bool matchTruncLshrBuildVectorFold(MachineInstr &MI, Register &MatchInfo) const
bool matchAllExplicitUsesAreUndef(MachineInstr &MI) const
Return true if all register explicit use operands on MI are defined by a G_IMPLICIT_DEF.
bool isPredecessor(const MachineInstr &DefMI, const MachineInstr &UseMI) const
Returns true if DefMI precedes UseMI or they are the same instruction.
bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const
bool matchTruncSSatS(MachineInstr &MI, Register &MatchInfo) const
const TargetLowering & getTargetLowering() const
bool matchShuffleUndefRHS(MachineInstr &MI, BuildFnTy &MatchInfo) const
Remove references to rhs if it is undef.
void applyBuildInstructionSteps(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const
Replace MI with a series of instructions described in MatchInfo.
void applySDivByPow2(MachineInstr &MI) const
void applySimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo) const
void applyUDivByPow2(MachineInstr &MI) const
Given an G_UDIV MI expressing an unsigned divided by a pow2 constant, return expressions that impleme...
bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine ors.
bool matchLshrOfTruncOfLshr(MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo, MachineInstr &ShiftMI) const
Fold (lshr (trunc (lshr x, C1)), C2) -> trunc (shift x, (C1 + C2))
bool matchSimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo) const
Return true if MI is a G_ADD which can be simplified to a G_SUB.
void replaceInstWithConstant(MachineInstr &MI, int64_t C) const
Replace an instruction with a G_CONSTANT with value C.
bool tryEmitMemcpyInline(MachineInstr &MI) const
Emit loads and stores that perform the given memcpy.
bool matchCombineFSubFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z)) (fsub (fpext (fmul x,...
void applyFsubToFneg(MachineInstr &MI, Register &MatchInfo) const
bool matchConstantLargerBitWidth(MachineInstr &MI, unsigned ConstIdx) const
Checks if constant at ConstIdx is larger than MI 's bitwidth.
void applyCombineCopy(MachineInstr &MI) const
bool matchAddSubSameReg(MachineInstr &MI, Register &Src) const
Transform G_ADD(x, G_SUB(y, x)) to y.
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData) const
void applyCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute) const
bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fmul x, y), z) -> (fma x, y, -z) (fsub (fmul x, y), z) -> (fmad x,...
bool matchCombineFAddFMAFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) (fadd (fmad x,...
bool matchSextTruncSextLoad(MachineInstr &MI) const
bool matchCombineMergeUnmerge(MachineInstr &MI, Register &MatchInfo) const
Fold away a merge of an unmerge of the corresponding values.
bool matchCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo) const
bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, Register &UnmergeSrc) const
bool matchDivByPow2(MachineInstr &MI, bool IsSigned) const
Given an G_SDIV MI expressing a signed divided by a pow2 constant, return expressions that implements...
bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd x, fneg(y)) -> (fsub x, y) (fadd fneg(x), y) -> (fsub y, x) (fsub x,...
bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match (and (load x), mask) -> zextload x.
bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fmul x, y), z) -> (fma x, y, z) (fadd (fmul x, y), z) -> (fmad x,...
bool matchCombineCopy(MachineInstr &MI) const
bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const
void applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const
bool matchXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo) const
Fold (xor (and x, y), y) -> (and (not x), y) {.
bool matchCombineShuffleVector(MachineInstr &MI, SmallVectorImpl< Register > &Ops) const
Check if the G_SHUFFLE_VECTOR MI can be replaced by a concat_vectors.
void applyCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const
bool matchCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute) const
Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y) Transform G_ADD y,...
void replaceInstWithFConstant(MachineInstr &MI, double C) const
Replace an instruction with a G_FCONSTANT with value C.
bool matchFunnelShiftToRotate(MachineInstr &MI) const
Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
bool matchOrShiftToFunnelShift(MachineInstr &MI, bool AllowScalarConstants, BuildFnTy &MatchInfo) const
bool matchRedundantSExtInReg(MachineInstr &MI) const
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
void applyFunnelShiftConstantModulo(MachineInstr &MI) const
Replaces the shift amount in MI with ShiftAmt % BW.
bool matchFoldC1Minus2MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyCombineShlOfExtend(MachineInstr &MI, const RegisterImmPair &MatchData) const
void applyUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelValueTracking *VT=nullptr, MachineDominatorTree *MDT=nullptr, const LegalizerInfo *LI=nullptr)
bool matchShuffleDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not reference a.
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const
Transform a multiply by a power-of-2 value to a left shift.
void applyCombineShuffleVector(MachineInstr &MI, ArrayRef< Register > Ops) const
Replace MI with a concat_vectors with Ops.
bool matchCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const
bool matchCombineUnmergeUndef(MachineInstr &MI, std::function< void(MachineIRBuilder &)> &MatchInfo) const
Transform G_UNMERGE G_IMPLICIT_DEF -> G_IMPLICIT_DEF, G_IMPLICIT_DEF, ...
void applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOpNo) const
SelectOperand is the operand in binary operator MI that is the select to fold.
bool matchFoldAMinusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const
bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_UMULO x, 2) -> (G_UADDO x, x) (G_SMULO x, 2) -> (G_SADDO x, x)
bool matchCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops) const
void applySextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo) const
bool tryCombineCopy(MachineInstr &MI) const
If MI is COPY, try to combine it.
bool matchTruncUSatU(MachineInstr &MI, MachineInstr &MinMI) const
bool matchICmpToLHSKnownBits(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Reassociate pointer calculations with G_ADD involved, to allow better addressing mode usage.
bool matchUndefShuffleVectorMask(MachineInstr &MI) const
Return true if a G_SHUFFLE_VECTOR instruction MI has an undef mask.
bool matchAnyExplicitUseIsUndef(MachineInstr &MI) const
Return true if any explicit use operand on MI is defined by a G_IMPLICIT_DEF.
bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) const
Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
bool matchCombineSubToAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
bool matchCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops) const
If MI is G_CONCAT_VECTORS, try to combine it.
bool matchInsertExtractVecEltOutOfBounds(MachineInstr &MI) const
Return true if a G_{EXTRACT,INSERT}_VECTOR_ELT has an out of range index.
bool matchExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo) const
LLVMContext & getContext() const
void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const
bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const
bool matchNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate) const
Combine inverting a result of a compare into the opposite cond code.
bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo) const
Match sext_inreg(load p), imm -> sextload p.
bool matchSelectIMinMax(const MachineOperand &MO, BuildFnTy &MatchInfo) const
Combine select to integer min/max.
bool matchConstantFoldUnaryIntOp(MachineInstr &MI, BuildFnTy &MatchInfo) const
Constant fold a unary integer op (G_CTLZ, G_CTTZ, G_CTPOP and their _ZERO_POISON variants,...
void applyCombineConstantFoldFpUnary(MachineInstr &MI, const ConstantFP *Cst) const
Transform fp_instr(cst) to constant result of the fp operation.
bool isLegal(const LegalityQuery &Query) const
bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo) const
bool matchOperandIsKnownToBeAPowerOfTwo(const MachineOperand &MO, bool OrNegative=false) const
Check if operand MO is known to be a power of 2.
bool tryReassocBinOp(unsigned Opc, Register DstReg, Register Op0, Register Op1, BuildFnTy &MatchInfo) const
Try to reassociate to reassociate operands of a commutative binop.
void eraseInst(MachineInstr &MI) const
Erase MI.
bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP *&MatchInfo) const
Do constant FP folding when opportunities are exposed after MIR building.
void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const
bool matchUndefStore(MachineInstr &MI) const
Return true if a G_STORE instruction MI is storing an undef value.
MachineRegisterInfo & MRI
void applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) const
Transform PtrToInt(IntToPtr(x)) to x.
void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const
bool matchConstantFPOp(const MachineOperand &MOP, double C) const
Return true if MOP is defined by a G_FCONSTANT or splat with a value exactly equal to C.
MachineInstr * buildUDivOrURemUsingMul(MachineInstr &MI) const
Given an G_UDIV MI or G_UREM MI expressing a divide by constant, return an expression that implements...
void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const
bool matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned &SelectOpNo) const
Push a binary operator through a select on constants.
bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount) const
bool tryCombineExtendingLoads(MachineInstr &MI) const
If MI is extend that consumes the result of a load, try to combine it.
bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const
bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo) const
bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: shr (and x, n), k -> ubfx x, pos, width.
void applyTruncSSatS(MachineInstr &MI, Register &MatchInfo) const
bool matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo) const
Do constant folding when opportunities are exposed after MIR building.
bool tryCombineShuffleVector(MachineInstr &MI) const
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
void applyRotateOutOfRange(MachineInstr &MI) const
bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchHoistLogicOpWithSameOpcodeHands(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const
Match (logic_op (op x...), (op y...)) -> (op (logic_op x, y))
bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: and (lshr x, cst), mask -> ubfx x, cst, width.
bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
bool matchUndefSelectCmp(MachineInstr &MI) const
Return true if a G_SELECT instruction MI has an undef comparison.
bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
void replaceInstWithUndef(MachineInstr &MI) const
Replace an instruction with a G_IMPLICIT_DEF.
bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform: (X + Y) == X -> Y == 0 (X - Y) == X -> Y == 0 (X ^ Y) == X -> Y == 0 (X + Y) !...
bool matchOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond) const
If a brcond's true block is not the fallthrough, make it so by inverting the condition and swapping o...
bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine addos.
void applyAshShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo) const
bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine selects.
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo) const
bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const
Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
bool matchFsubToFneg(MachineInstr &MI, Register &MatchInfo) const
bool matchRotateOutOfRange(MachineInstr &MI) const
void applyExpandFPowI(MachineInstr &MI, int64_t Exponent) const
Expands FPOWI into a series of multiplications and a division if the exponent is negative.
void setRegBank(Register Reg, const RegisterBank *RegBank) const
Set the register bank of Reg.
bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) const
Return true if a G_SELECT instruction MI has a constant comparison.
bool matchCommuteFPConstantToRHS(MachineInstr &MI) const
Match constant LHS FP ops that should be commuted.
void applyCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const
bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info) const
bool matchRedundantOr(MachineInstr &MI, Register &Replacement) const
void applyTruncSSatU(MachineInstr &MI, Register &MatchInfo) const
void applySimplifySRemByPow2(MachineInstr &MI) const
Combine G_SREM x, (+/-2^k) to a bias-and-mask sequence.
bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fpext (fneg (fmul x, y))), z) -> (fneg (fma (fpext x), (fpext y),...
bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo) const
void applyCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo) const
bool matchConstantOp(const MachineOperand &MOP, int64_t C) const
Return true if MOP is defined by a G_CONSTANT or splat with a value equal to C.
const LegalizerInfo * LI
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const
void applyCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, Register &UnmergeSrc) const
bool matchUMulHToLShr(MachineInstr &MI) const
MachineDominatorTree * MDT
void applyFunnelShiftToRotate(MachineInstr &MI) const
bool matchSimplifySelectToMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyRepeatedFPDivisor(SmallVector< MachineInstr * > &MatchInfo) const
bool matchTruncUSatUToFPTOUISat(MachineInstr &MI, MachineInstr &SrcMI) const
const RegisterBankInfo * RBI
bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_*MULO x, 0) -> 0 + no carry out.
GISelValueTracking * VT
bool matchBinopWithNeg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Fold a bitwiseop (~b +/- c) -> a bitwiseop ~(b -/+ c)
bool matchCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts) const
Transform G_UNMERGE Constant -> Constant1, Constant2, ...
void applyShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const
const TargetRegisterInfo * TRI
bool matchRedundantAnd(MachineInstr &MI, Register &Replacement) const
bool dominates(const MachineInstr &DefMI, const MachineInstr &UseMI) const
Returns true if DefMI dominates UseMI.
GISelChangeObserver & Observer
void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo) const
Transform trunc (shl x, K) to shl (trunc x), K if K < VT.getScalarSizeInBits().
bool matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal) const
Reduce a shift by a constant to an unmerge and a shift on a half sized type.
bool matchUDivOrURemByConst(MachineInstr &MI) const
Combine G_UDIV or G_UREM by constant into a multiply by magic constant.
bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine ands.
bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchConstantFoldFMA(MachineInstr &MI, ConstantFP *&MatchInfo) const
Constant fold G_FMA/G_FMAD.
bool matchCombineFSubFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) (fsub (fneg (fmul,...
bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg) const
Transform zext(trunc(x)) to x.
bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) const
Check if operand OpIdx is undef.
void applyLshrOfTruncOfLshr(MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo) const
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0) const
Optimize memcpy intrinsics et al, e.g.
bool matchFreezeOfSingleMaybePoisonOperand(MachineInstr &MI, BuildFnTy &MatchInfo) const
void applySDivOrSRemByConst(MachineInstr &MI) const
MachineInstr * buildSDivOrSRemUsingMul(MachineInstr &MI) const
Given an G_SDIV MI or G_SREM MI expressing a signed divide by constant, return an expression that imp...
bool isLegalOrHasWidenScalar(const LegalityQuery &Query) const
bool matchSubAddSameReg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform: (x + y) - y -> x (x + y) - x -> y x - (y + x) -> 0 - y x - (x + z) -> 0 - z.
bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchOverlappingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0.
bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) const
Transform anyext(trunc(x)) to x.
void applyExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo) const
MachineIRBuilder & Builder
void applyCommuteBinOpOperands(MachineInstr &MI) const
void replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx) const
Delete MI and replace all of its uses with its OpIdx-th operand.
void applySextTruncSextLoad(MachineInstr &MI) const
const MachineFunction & getMachineFunction() const
bool matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchSDivOrSRemByConst(MachineInstr &MI) const
Combine G_SDIV or G_SREM by constant into a multiply by magic constant.
void applyOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond) const
void applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal) const
bool matchFPowIExpansion(MachineInstr &MI, int64_t Exponent) const
Match FPOWI if it's safe to extend it into a series of multiplications.
void applyCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo) const
bool matchCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands) const
Transform <ty,...> G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z.
void applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands) const
bool matchAshrShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo) const
Match ashr (shl x, C), C -> sext_inreg (C)
void applyCombineUnmergeZExtToZExt(MachineInstr &MI) const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
const APFloat & getValue() const
Definition Constants.h:464
const APFloat & getValueAPF() const
Definition Constants.h:463
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
This class represents a range of values.
LLVM_ABI std::optional< ConstantRange > exactUnionWith(const ConstantRange &CR) const
Union the two ranges and return the result if it can be represented exactly, otherwise return std::nu...
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
static LLVM_ABI ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned)
Initialize a range based on a known bits constraint.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI OverflowResult unsignedSubMayOverflow(const ConstantRange &Other) const
Return whether unsigned sub of the two ranges always/never overflows.
LLVM_ABI OverflowResult unsignedAddMayOverflow(const ConstantRange &Other) const
Return whether unsigned add of the two ranges always/never overflows.
LLVM_ABI bool isWrappedSet() const
Return true if this set wraps around the unsigned domain.
const APInt & getUpper() const
Return the upper value for this range.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI OverflowResult signedAddMayOverflow(const ConstantRange &Other) const
Return whether signed add of the two ranges always/never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
LLVM_ABI OverflowResult signedSubMayOverflow(const ConstantRange &Other) const
Return whether signed sub of the two ranges always/never overflows.
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isBigEndian() const
Definition DataLayout.h:218
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:205
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:254
unsigned size() const
Definition DenseMap.h:110
iterator end()
Definition DenseMap.h:81
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
Represents overflowing add operations.
Represents an integer addition.
Represents a logical and.
CmpInst::Predicate getCond() const
Register getLHSReg() const
Register getRHSReg() const
Represents an any ext.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getCarryOutReg() const
Register getLHSReg() const
Register getRHSReg() const
Represents a G_BUILD_VECTOR.
Represent a G_ICMP.
Abstract class that contains various methods for clients to notify about changes.
Simple wrapper observer that takes several observers, and calls each one for each event.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
Represents a G_LOAD.
Represents a logical binary operation.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Register getSourceReg(unsigned I) const
Returns the I'th source register.
unsigned getNumSources() const
Returns the number of source registers.
Represents a G_MERGE_VALUES.
Represents a logical or.
Represents a G_PTR_ADD.
Represents a G_SELECT.
Register getCondReg() const
Represents overflowing sub operations.
Represents an integer subtraction.
Represents a G_UNMERGE_VALUES.
unsigned getNumDefs() const
Returns the number of def registers.
Register getSourceReg() const
Get the unmerge source register.
Represents a G_ZEXTLOAD.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static LLVM_ABI bool compare(const APInt &LHS, const APInt &RHS, ICmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
LLT getScalarType() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr ElementCount getElementCount() const
constexpr bool isPointerOrPointerVector() const
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
static LLT integer(unsigned SizeInBits)
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
TypeSize getValue() const
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI bool isDereferenceableInvariantLoad() const
Return true if this load instruction never traps and points to a memory location whose value doesn't ...
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
mop_range uses()
Returns all operands which may be register uses.
MachineOperand * findRegisterUseOperand(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
unsigned getAddrSpace() const
const MachinePointerInfo & getPointerInfo() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
const ConstantInt * getCImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
void setPredicate(unsigned Predicate)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
unsigned getPredicate() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
static use_instr_nodbg_iterator use_instr_nodbg_end()
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
This class implements the register bank concept.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
size_type count(const_arg_type key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:262
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool all() const
Returns true if all bits are set.
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual LLVM_READONLY LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const
Return the preferred type to use for a shift opcode, given the shifted amount type is ShiftValueTy.
bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const
Return true if it is beneficial to expand an @llvm.powi.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR, false > m_GBuildVector(const LHS &L, const RHS &R)
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
operand_type_match m_Pred()
BinaryOp_match< LHS, RHS, TargetOpcode::G_UMIN, true > m_GUMin(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
UnaryOp_match< SrcTy, TargetOpcode::G_INTTOPTR > m_GIntToPtr(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
CheckType m_SpecificType(LLT Ty)
deferred_ty< Register > m_DeferredReg(Register &R)
Similar to m_SpecificReg/Type, but the specific value to match originated from an earlier sub-pattern...
BinaryOp_match< LHS, RHS, TargetOpcode::G_UMAX, true > m_GUMax(const LHS &L, const RHS &R)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, TargetOpcode::G_FADD, true > m_GFAdd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_PTRTOINT > m_GPtrToInt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FSUB, false > m_GFSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SUB > m_GSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
SpecificConstantOrSplatMatch m_SpecificICstOrSplat(const APInt &RequestedValue)
Matches a RequestedValue constant or a constant splat of RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR_TRUNC, false > m_GBuildVectorTrunc(const LHS &L, const RHS &R)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP, true > m_c_GICmp(const Pred &P, const LHS &L, const RHS &R)
G_ICMP matcher that also matches commuted compares.
TernaryOp_match< Src0Ty, Src1Ty, Src2Ty, TargetOpcode::G_INSERT_VECTOR_ELT > m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
And< Preds... > m_all_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, true > m_GSMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, true > m_GSMax(const LHS &L, const RHS &R)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:557
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition Utils.cpp:1444
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:1984
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:653
static double log2(double V)
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:461
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:294
LLVM_ABI std::optional< APInt > getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1404
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1569
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:315
std::function< void(MachineIRBuilder &)> BuildFnTy
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
LLVM_ABI std::optional< APFloat > ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition Utils.cpp:741
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1527
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1551
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:494
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1584
LLVM_ABI bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Returns true if given the TargetLowering's boolean contents information, the value Val contains a tru...
Definition Utils.cpp:1616
LLVM_ABI std::optional< APInt > ConstantFoldBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition Utils.cpp:672
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI const APInt & getIConstantFromReg(Register VReg, const MachineRegisterInfo &MRI)
VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:305
LLVM_ABI bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition Utils.cpp:1507
SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > OperandBuildSteps
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI bool canReplaceReg(Register DstReg, Register SrcReg, MachineRegisterInfo &MRI)
Check if DstReg can be replaced with SrcReg depending on the register constraints.
Definition Utils.cpp:200
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
LLVM_ABI bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
canCreateUndefOrPoison returns true if Op can create undef or poison from non-undef & non-poison oper...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI std::optional< FPValueAndVReg > getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI, bool AllowUndef=true)
Returns a floating point scalar constant of a build vector splat if it exists.
Definition Utils.cpp:1437
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI std::optional< APInt > ConstantFoldCastOp(unsigned Opcode, LLT DstTy, const Register Op0, const MachineRegisterInfo &MRI)
Definition Utils.cpp:908
@ Other
Any other memory.
Definition ModRef.h:68
LLVM_ABI unsigned getInverseGMinMaxOpcode(unsigned MinMaxOpc)
Returns the inverse opcode of MinMaxOpc, which is a generic min/max opcode like G_SMIN.
Definition Utils.cpp:279
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
LLVM_ABI std::optional< FPValueAndVReg > getFConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_FCONSTANT returns it...
Definition Utils.cpp:447
LLVM_ABI std::optional< APFloat > isConstantOrConstantSplatVectorFP(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a float constant integer or a splat vector of float constant integers.
Definition Utils.cpp:1540
constexpr unsigned BitWidth
LLVM_ABI int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP)
Returns an integer representing true, as defined by the TargetBooleanContents.
Definition Utils.cpp:1641
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:368
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition Utils.cpp:469
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI SmallVector< APInt > ConstantFoldUnaryIntOp(unsigned Opcode, LLT DstTy, Register Src, const MachineRegisterInfo &MRI)
Tries to constant fold a unary integer operation (G_CTLZ, G_CTTZ, G_CTPOP and their _ZERO_POISON vari...
Definition Utils.cpp:945
LLVM_ABI bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero=false, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return true if the given value is known to have exactly one bit set when defined.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition Utils.cpp:501
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
unsigned getFCmpCode(CmpInst::Predicate CC)
Similar to getICmpCode but for FCmpInst.
LLVM_ABI std::optional< int64_t > getIConstantSplatSExtVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1422
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Simple struct used to hold a Register value and the instruction which defines it.
Definition Utils.h:229
Extended Value Type.
Definition ValueTypes.h:35
SmallVector< InstructionBuildSteps, 2 > InstrsToBuild
Describes instructions to be built during a combine.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:106
unsigned countMinLeadingOnes() const
Returns the minimum number of leading one bits.
Definition KnownBits.h:265
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:256
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:64
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:146
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:103
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
const RegisterBank * Bank
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true, bool AllowWidenOptimization=false)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...